diff --git a/clang-tools-extra/clang-tidy/readability/MagicNumbersCheck.cpp b/clang-tools-extra/clang-tidy/readability/MagicNumbersCheck.cpp index 231e565f27e52..86443a155069e 100644 --- a/clang-tools-extra/clang-tidy/readability/MagicNumbersCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/MagicNumbersCheck.cpp @@ -86,15 +86,17 @@ MagicNumbersCheck::MagicNumbersCheck(StringRef Name, ClangTidyContext *Context) IgnoredDoublePointValues.reserve(IgnoredFloatingPointValuesInput.size()); for (const auto &InputValue : IgnoredFloatingPointValuesInput) { llvm::APFloat FloatValue(llvm::APFloat::IEEEsingle()); - if (!FloatValue.convertFromString(InputValue, DefaultRoundingMode)) { - assert(false && "Invalid floating point representation"); - } + auto StatusOrErr = + FloatValue.convertFromString(InputValue, DefaultRoundingMode); + assert(StatusOrErr && "Invalid floating point representation"); + consumeError(StatusOrErr.takeError()); IgnoredFloatingPointValues.push_back(FloatValue.convertToFloat()); llvm::APFloat DoubleValue(llvm::APFloat::IEEEdouble()); - if (!DoubleValue.convertFromString(InputValue, DefaultRoundingMode)) { - assert(false && "Invalid floating point representation"); - } + StatusOrErr = + DoubleValue.convertFromString(InputValue, DefaultRoundingMode); + assert(StatusOrErr && "Invalid floating point representation"); + consumeError(StatusOrErr.takeError()); IgnoredDoublePointValues.push_back(DoubleValue.convertToDouble()); } llvm::sort(IgnoredFloatingPointValues.begin(), diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp index b1eca02813b38..f11b1236803c7 100644 --- a/clang-tools-extra/clangd/CompileCommands.cpp +++ b/clang-tools-extra/clangd/CompileCommands.cpp @@ -155,7 +155,9 @@ void CommandMangler::adjust(std::vector &Cmd) const { if (ResourceDir && !Has("-resource-dir")) Cmd.push_back(("-resource-dir=" + *ResourceDir)); - if (Sysroot && !Has("-isysroot")) { + // Don't set `-isysroot` if it is already set or if `--sysroot` is set. + // `--sysroot` is a superset of the `-isysroot` argument. + if (Sysroot && !Has("-isysroot") && !Has("--sysroot")) { Cmd.push_back("-isysroot"); Cmd.push_back(*Sysroot); } diff --git a/clang-tools-extra/clangd/Diagnostics.cpp b/clang-tools-extra/clangd/Diagnostics.cpp index e78df0322eb32..ad8f6c8bef9a1 100644 --- a/clang-tools-extra/clangd/Diagnostics.cpp +++ b/clang-tools-extra/clangd/Diagnostics.cpp @@ -22,6 +22,8 @@ #include "clang/Lex/Token.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" @@ -328,14 +330,22 @@ CodeAction toCodeAction(const Fix &F, const URIForFile &File) { void toLSPDiags( const Diag &D, const URIForFile &File, const ClangdDiagnosticOptions &Opts, llvm::function_ref)> OutFn) { - auto FillBasicFields = [](const DiagBase &D) -> clangd::Diagnostic { - clangd::Diagnostic Res; - Res.range = D.Range; - Res.severity = getSeverity(D.Severity); - return Res; - }; + clangd::Diagnostic Main; + Main.severity = getSeverity(D.Severity); + + // Main diagnostic should always refer to a range inside main file. If a + // diagnostic made it so for, it means either itself or one of its notes is + // inside main file. + if (D.InsideMainFile) { + Main.range = D.Range; + } else { + auto It = + llvm::find_if(D.Notes, [](const Note &N) { return N.InsideMainFile; }); + assert(It != D.Notes.end() && + "neither the main diagnostic nor notes are inside main file"); + Main.range = It->Range; + } - clangd::Diagnostic Main = FillBasicFields(D); Main.code = D.Name; switch (D.Source) { case Diag::Clang: @@ -379,7 +389,9 @@ void toLSPDiags( for (auto &Note : D.Notes) { if (!Note.InsideMainFile) continue; - clangd::Diagnostic Res = FillBasicFields(Note); + clangd::Diagnostic Res; + Res.severity = getSeverity(Note.Severity); + Res.range = Note.Range; Res.message = noteMessage(D, Note, Opts); OutFn(std::move(Res), llvm::ArrayRef()); } diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp index b1a2e289eed79..20883b347fdc4 100644 --- a/clang-tools-extra/clangd/Hover.cpp +++ b/clang-tools-extra/clangd/Hover.cpp @@ -25,9 +25,11 @@ #include "clang/AST/PrettyPrinter.h" #include "clang/Index/IndexSymbol.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/iterator_range.h" -#include "llvm/Support/Casting.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" +#include namespace clang { namespace clangd { @@ -224,8 +226,8 @@ void enhanceFromIndex(HoverInfo &Hover, const NamedDecl &ND, // Populates Type, ReturnType, and Parameters for function-like decls. void fillFunctionTypeAndParams(HoverInfo &HI, const Decl *D, - const FunctionDecl *FD, - const PrintingPolicy &Policy) { + const FunctionDecl *FD, + const PrintingPolicy &Policy) { HI.Parameters.emplace(); for (const ParmVarDecl *PVD : FD->parameters()) { HI.Parameters->emplace_back(); @@ -250,11 +252,11 @@ void fillFunctionTypeAndParams(HoverInfo &HI, const Decl *D, } } - if (const auto* CCD = llvm::dyn_cast(FD)) { + if (const auto *CCD = llvm::dyn_cast(FD)) { // Constructor's "return type" is the class type. HI.ReturnType = declaredType(CCD->getParent()).getAsString(Policy); // Don't provide any type for the constructor itself. - } else if (llvm::isa(FD)){ + } else if (llvm::isa(FD)) { HI.ReturnType = "void"; } else { HI.ReturnType = FD->getReturnType().getAsString(Policy); @@ -309,7 +311,7 @@ llvm::Optional printExprValue(const SelectionTree::Node *N, } /// Generate a \p Hover object given the declaration \p D. -HoverInfo getHoverContents(const Decl *D, const SymbolIndex *Index) { +HoverInfo getHoverContents(const NamedDecl *D, const SymbolIndex *Index) { HoverInfo HI; const ASTContext &Ctx = D->getASTContext(); @@ -321,12 +323,10 @@ HoverInfo getHoverContents(const Decl *D, const SymbolIndex *Index) { HI.LocalScope.append("::"); PrintingPolicy Policy = printingPolicyForDecls(Ctx.getPrintingPolicy()); - if (const NamedDecl *ND = llvm::dyn_cast(D)) { - HI.Name = printName(Ctx, *ND); - ND = getDeclForComment(ND); - HI.Documentation = getDeclComment(Ctx, *ND); - enhanceFromIndex(HI, *ND, Index); - } + HI.Name = printName(Ctx, *D); + const auto *CommentD = getDeclForComment(D); + HI.Documentation = getDeclComment(Ctx, *CommentD); + enhanceFromIndex(HI, *CommentD, Index); HI.Kind = index::getSymbolInfo(D).Kind; @@ -460,34 +460,70 @@ llvm::Optional getHover(ParsedAST &AST, Position Pos, tooling::applyAllReplacements(HI->Definition, Replacements)) HI->Definition = *Formatted; - HI->SymRange = getTokenRange(AST.getSourceManager(), - AST.getLangOpts(), SourceLocationBeg); + HI->SymRange = getTokenRange(AST.getSourceManager(), AST.getLangOpts(), + SourceLocationBeg); return HI; } markup::Document HoverInfo::present() const { markup::Document Output; - if (NamespaceScope) { - auto &P = Output.addParagraph(); - P.appendText("Declared in"); - // Drop trailing "::". - if (!LocalScope.empty()) - P.appendCode(llvm::StringRef(LocalScope).drop_back(2)); - else if (NamespaceScope->empty()) - P.appendCode("global namespace"); - else - P.appendCode(llvm::StringRef(*NamespaceScope).drop_back(2)); + // Header contains a text of the form: + // variable `var` : `int` + // + // class `X` + // + // function `foo` → `int` + markup::Paragraph &Header = Output.addParagraph(); + Header.appendText(index::getSymbolKindString(Kind)); + assert(!Name.empty() && "hover triggered on a nameless symbol"); + Header.appendCode(Name); + if (ReturnType) { + Header.appendText("→"); + Header.appendCode(*ReturnType); + } else if (Type) { + Header.appendText(":"); + Header.appendCode(*Type); } - if (!Definition.empty()) { - Output.addCodeBlock(Definition); - } else { - // Builtin types - Output.addCodeBlock(Name); + // For functions we display signature in a list form, e.g.: + // - `bool param1` + // - `int param2 = 5` + if (Parameters && !Parameters->empty()) { + markup::BulletList &L = Output.addBulletList(); + for (const auto &Param : *Parameters) { + std::string Buffer; + llvm::raw_string_ostream OS(Buffer); + OS << Param; + L.addItem().addParagraph().appendCode(std::move(OS.str())); + } + } + + if (Value) { + markup::Paragraph &P = Output.addParagraph(); + P.appendText("Value ="); + P.appendCode(*Value); } if (!Documentation.empty()) Output.addParagraph().appendText(Documentation); + + if (!Definition.empty()) { + std::string ScopeComment; + // Drop trailing "::". + if (!LocalScope.empty()) { + // Container name, e.g. class, method, function. + // We might want to propogate some info about container type to print + // function foo, class X, method X::bar, etc. + ScopeComment = + "// In " + llvm::StringRef(LocalScope).rtrim(':').str() + '\n'; + } else if (NamespaceScope && !NamespaceScope->empty()) { + ScopeComment = "// In namespace " + + llvm::StringRef(*NamespaceScope).rtrim(':').str() + '\n'; + } + // Note that we don't print anything for global namespace, to not annoy + // non-c++ projects or projects that are not making use of namespaces. + Output.addCodeBlock(ScopeComment + Definition); + } return Output; } diff --git a/clang-tools-extra/clangd/test/hover.test b/clang-tools-extra/clangd/test/hover.test index e45164b346ea5..2162ff9abcdc3 100644 --- a/clang-tools-extra/clangd/test/hover.test +++ b/clang-tools-extra/clangd/test/hover.test @@ -9,7 +9,7 @@ # CHECK-NEXT: "result": { # CHECK-NEXT: "contents": { # CHECK-NEXT: "kind": "plaintext", -# CHECK-NEXT: "value": "Declared in global namespace\n\nvoid foo()" +# CHECK-NEXT: "value": "function foo → void\n\nvoid foo()" # CHECK-NEXT: }, # CHECK-NEXT: "range": { # CHECK-NEXT: "end": { @@ -37,7 +37,7 @@ # CHECK-NEXT: "result": { # CHECK-NEXT: "contents": { # CHECK-NEXT: "kind": "plaintext", -# CHECK-NEXT: "value": "Declared in global namespace\n\nenum foo {}" +# CHECK-NEXT: "value": "enum foo\n\nenum foo {}" # CHECK-NEXT: }, # CHECK-NEXT: "range": { # CHECK-NEXT: "end": { diff --git a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp index 0941af25213ca..ef73519ef1385 100644 --- a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp +++ b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp @@ -1014,6 +1014,29 @@ TEST(IgnoreDiags, FromNonWrittenInclude) { EXPECT_THAT(TU.build().getDiagnostics(), UnorderedElementsAre()); } +TEST(ToLSPDiag, RangeIsInMain) { + ClangdDiagnosticOptions Opts; + clangd::Diag D; + D.Range = {pos(1, 2), pos(3, 4)}; + D.Notes.emplace_back(); + Note &N = D.Notes.back(); + N.Range = {pos(2, 3), pos(3, 4)}; + + D.InsideMainFile = true; + N.InsideMainFile = false; + toLSPDiags(D, {}, Opts, + [&](clangd::Diagnostic LSPDiag, ArrayRef) { + EXPECT_EQ(LSPDiag.range, D.Range); + }); + + D.InsideMainFile = false; + N.InsideMainFile = true; + toLSPDiags(D, {}, Opts, + [&](clangd::Diagnostic LSPDiag, ArrayRef) { + EXPECT_EQ(LSPDiag.range, N.Range); + }); +} + } // namespace } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/unittests/HoverTests.cpp b/clang-tools-extra/clangd/unittests/HoverTests.cpp index 4c6d2abbd24a1..44337688ff87a 100644 --- a/clang-tools-extra/clangd/unittests/HoverTests.cpp +++ b/clang-tools-extra/clangd/unittests/HoverTests.cpp @@ -1606,6 +1606,95 @@ TEST(Hover, DocsFromMostSpecial) { } } } +TEST(Hover, Present) { + struct { + const std::function Builder; + llvm::StringRef ExpectedRender; + } Cases[] = { + { + [](HoverInfo &HI) { + HI.Kind = index::SymbolKind::Unknown; + HI.Name = "X"; + }, + R"( X)", + }, + { + [](HoverInfo &HI) { + HI.Kind = index::SymbolKind::NamespaceAlias; + HI.Name = "foo"; + }, + R"(namespace-alias foo)", + }, + { + [](HoverInfo &HI) { + HI.Kind = index::SymbolKind::Class; + HI.TemplateParameters = { + {std::string("typename"), std::string("T"), llvm::None}, + {std::string("typename"), std::string("C"), + std::string("bool")}, + }; + HI.Documentation = "documentation"; + HI.Definition = + "template class Foo {}"; + HI.Name = "foo"; + HI.NamespaceScope.emplace(); + }, + R"(class foo +documentation + +template class Foo {})", + }, + { + [](HoverInfo &HI) { + HI.Kind = index::SymbolKind::Function; + HI.Name = "foo"; + HI.Type = "type"; + HI.ReturnType = "ret_type"; + HI.Parameters.emplace(); + HoverInfo::Param P; + HI.Parameters->push_back(P); + P.Type = "type"; + HI.Parameters->push_back(P); + P.Name = "foo"; + HI.Parameters->push_back(P); + P.Default = "default"; + HI.Parameters->push_back(P); + HI.NamespaceScope = "ns::"; + HI.Definition = "ret_type foo(params) {}"; + }, + R"(function foo → ret_type +- +- type +- type foo +- type foo = default + +// In namespace ns +ret_type foo(params) {})", + }, + { + [](HoverInfo &HI) { + HI.Kind = index::SymbolKind::Variable; + HI.LocalScope = "test::bar::"; + HI.Value = "value"; + HI.Name = "foo"; + HI.Type = "type"; + HI.Definition = "def"; + }, + R"(variable foo : type +Value = value + +// In test::bar +def)", + }, + }; + + for (const auto &C : Cases) { + HoverInfo HI; + C.Builder(HI); + EXPECT_EQ(HI.present().asPlainText(), C.ExpectedRender); + } +} + } // namespace } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability-misleading-indentation.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability-misleading-indentation.cpp index c3bd33d8ee7b8..aea0618d120db 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability-misleading-indentation.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability-misleading-indentation.cpp @@ -1,4 +1,4 @@ -// RUN: %check_clang_tidy %s readability-misleading-indentation %t +// RUN: %check_clang_tidy %s readability-misleading-indentation %t -- -- -fno-delayed-template-parsing void foo1(); void foo2(); @@ -168,6 +168,17 @@ void mustFailNonTemplate() { // CHECK-MESSAGES: :[[@LINE-2]]:5: warning: different indentation for 'if' and corresponding 'else' [readability-misleading-indentation] } +template +void mustFailNoInsta() { + if constexpr (b) { + foo1(); + } + else { + foo2(); + // CHECK-MESSAGES: :[[@LINE-2]]:5: warning: different indentation for 'if' and corresponding 'else' [readability-misleading-indentation] + } +} + template void mustPassNoInsta() { if constexpr (b) { diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index f55ffccc84d05..856d5e34bbcc2 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -2661,7 +2661,8 @@ This will produce a generic test.bc file that can be used in vendor toolchains to perform machine code generation. Clang currently supports OpenCL C language standards up to v2.0. Starting from -clang 9 a C++ mode is available for OpenCL (see :ref:`C++ for OpenCL `). +clang 9 a C++ mode is available for OpenCL (see +:ref:`C++ for OpenCL `). OpenCL Specific Options ----------------------- @@ -3024,7 +3025,7 @@ There are some standard OpenCL functions that are implemented as Clang builtins: enqueue query functions from `section 6.13.17.5 `_. -.. _opencl_cpp: +.. _cxx_for_opencl: C++ for OpenCL -------------- diff --git a/clang/include/clang/AST/ASTLambda.h b/clang/include/clang/AST/ASTLambda.h index c1153168e41bb..6fd82d6af4908 100644 --- a/clang/include/clang/AST/ASTLambda.h +++ b/clang/include/clang/AST/ASTLambda.h @@ -64,6 +64,17 @@ inline bool isGenericLambdaCallOperatorSpecialization(DeclContext *DC) { dyn_cast(DC)); } +inline bool isGenericLambdaCallOperatorOrStaticInvokerSpecialization( + DeclContext *DC) { + CXXMethodDecl *MD = dyn_cast(DC); + if (!MD) return false; + const CXXRecordDecl *LambdaClass = MD->getParent(); + if (LambdaClass && LambdaClass->isGenericLambda()) + return (isLambdaCallOperator(MD) || MD->isLambdaStaticInvoker()) && + MD->isFunctionTemplateSpecialization(); + return false; +} + // This returns the parent DeclContext ensuring that the correct // parent DeclContext is returned for Lambdas diff --git a/clang/include/clang/AST/ASTNodeTraverser.h b/clang/include/clang/AST/ASTNodeTraverser.h index 9dab814b659ba..a672d92695da9 100644 --- a/clang/include/clang/AST/ASTNodeTraverser.h +++ b/clang/include/clang/AST/ASTNodeTraverser.h @@ -384,6 +384,9 @@ class ASTNodeTraverser for (const auto *Parameter : D->parameters()) Visit(Parameter); + if (const Expr *TRC = D->getTrailingRequiresClause()) + Visit(TRC); + if (const auto *C = dyn_cast(D)) for (const auto *I : C->inits()) Visit(I); diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index cd97c6dcf8d5c..002d1434b1cbd 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -669,10 +669,12 @@ struct QualifierInfo { /// Represents a ValueDecl that came out of a declarator. /// Contains type source information through TypeSourceInfo. class DeclaratorDecl : public ValueDecl { - // A struct representing both a TInfo and a syntactic qualifier, - // to be used for the (uncommon) case of out-of-line declarations. + // A struct representing a TInfo, a trailing requires-clause and a syntactic + // qualifier, to be used for the (uncommon) case of out-of-line declarations + // and constrained function decls. struct ExtInfo : public QualifierInfo { TypeSourceInfo *TInfo; + Expr *TrailingRequiresClause = nullptr; }; llvm::PointerUnion DeclInfo; @@ -739,6 +741,21 @@ class DeclaratorDecl : public ValueDecl { void setQualifierInfo(NestedNameSpecifierLoc QualifierLoc); + /// \brief Get the constraint-expression introduced by the trailing + /// requires-clause in the function/member declaration, or null if no + /// requires-clause was provided. + Expr *getTrailingRequiresClause() { + return hasExtInfo() ? getExtInfo()->TrailingRequiresClause + : nullptr; + } + + const Expr *getTrailingRequiresClause() const { + return hasExtInfo() ? getExtInfo()->TrailingRequiresClause + : nullptr; + } + + void setTrailingRequiresClause(Expr *TrailingRequiresClause); + unsigned getNumTemplateParameterLists() const { return hasExtInfo() ? getExtInfo()->NumTemplParamLists : 0; } @@ -1903,7 +1920,8 @@ class FunctionDecl : public DeclaratorDecl, FunctionDecl(Kind DK, ASTContext &C, DeclContext *DC, SourceLocation StartLoc, const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo, StorageClass S, bool isInlineSpecified, - ConstexprSpecKind ConstexprKind); + ConstexprSpecKind ConstexprKind, + Expr *TrailingRequiresClause = nullptr); using redeclarable_base = Redeclarable; @@ -1938,11 +1956,12 @@ class FunctionDecl : public DeclaratorDecl, SourceLocation NLoc, DeclarationName N, QualType T, TypeSourceInfo *TInfo, StorageClass SC, bool isInlineSpecified = false, bool hasWrittenPrototype = true, - ConstexprSpecKind ConstexprKind = CSK_unspecified) { + ConstexprSpecKind ConstexprKind = CSK_unspecified, + Expr *TrailingRequiresClause = nullptr) { DeclarationNameInfo NameInfo(N, NLoc); return FunctionDecl::Create(C, DC, StartLoc, NameInfo, T, TInfo, SC, isInlineSpecified, hasWrittenPrototype, - ConstexprKind); + ConstexprKind, TrailingRequiresClause); } static FunctionDecl *Create(ASTContext &C, DeclContext *DC, @@ -1950,7 +1969,8 @@ class FunctionDecl : public DeclaratorDecl, const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo, StorageClass SC, bool isInlineSpecified, bool hasWrittenPrototype, - ConstexprSpecKind ConstexprKind); + ConstexprSpecKind ConstexprKind, + Expr *TrailingRequiresClause); static FunctionDecl *CreateDeserialized(ASTContext &C, unsigned ID); @@ -2352,6 +2372,17 @@ class FunctionDecl : public DeclaratorDecl, /// the target functionality. bool isTargetMultiVersion() const; + /// \brief Get the associated-constraints of this function declaration. + /// Currently, this will either be a vector of size 1 containing the + /// trailing-requires-clause or an empty vector. + /// + /// Use this instead of getTrailingRequiresClause for concepts APIs that + /// accept an ArrayRef of constraint expressions. + void getAssociatedConstraints(SmallVectorImpl &AC) const { + if (auto *TRC = getTrailingRequiresClause()) + AC.push_back(TRC); + } + void setPreviousDeclaration(FunctionDecl * PrevDecl); FunctionDecl *getCanonicalDecl() override; diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h index 1c6f99438fc3e..aba33e383976c 100644 --- a/clang/include/clang/AST/DeclCXX.h +++ b/clang/include/clang/AST/DeclCXX.h @@ -1905,9 +1905,10 @@ class CXXMethodDecl : public FunctionDecl { SourceLocation StartLoc, const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo, StorageClass SC, bool isInline, ConstexprSpecKind ConstexprKind, - SourceLocation EndLocation) + SourceLocation EndLocation, + Expr *TrailingRequiresClause = nullptr) : FunctionDecl(DK, C, RD, StartLoc, NameInfo, T, TInfo, SC, isInline, - ConstexprKind) { + ConstexprKind, TrailingRequiresClause) { if (EndLocation.isValid()) setRangeEnd(EndLocation); } @@ -1918,7 +1919,8 @@ class CXXMethodDecl : public FunctionDecl { const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo, StorageClass SC, bool isInline, ConstexprSpecKind ConstexprKind, - SourceLocation EndLocation); + SourceLocation EndLocation, + Expr *TrailingRequiresClause = nullptr); static CXXMethodDecl *CreateDeserialized(ASTContext &C, unsigned ID); @@ -2363,7 +2365,8 @@ class CXXConstructorDecl final const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo, ExplicitSpecifier ES, bool isInline, bool isImplicitlyDeclared, ConstexprSpecKind ConstexprKind, - InheritedConstructor Inherited); + InheritedConstructor Inherited, + Expr *TrailingRequiresClause); void anchor() override; @@ -2416,7 +2419,8 @@ class CXXConstructorDecl final const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo, ExplicitSpecifier ES, bool isInline, bool isImplicitlyDeclared, ConstexprSpecKind ConstexprKind, - InheritedConstructor Inherited = InheritedConstructor()); + InheritedConstructor Inherited = InheritedConstructor(), + Expr *TrailingRequiresClause = nullptr); ExplicitSpecifier getExplicitSpecifier() { return getCanonicalDecl()->getExplicitSpecifierInternal(); @@ -2623,9 +2627,11 @@ class CXXDestructorDecl : public CXXMethodDecl { CXXDestructorDecl(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc, const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo, bool isInline, - bool isImplicitlyDeclared, ConstexprSpecKind ConstexprKind) + bool isImplicitlyDeclared, ConstexprSpecKind ConstexprKind, + Expr *TrailingRequiresClause = nullptr) : CXXMethodDecl(CXXDestructor, C, RD, StartLoc, NameInfo, T, TInfo, - SC_None, isInline, ConstexprKind, SourceLocation()) { + SC_None, isInline, ConstexprKind, SourceLocation(), + TrailingRequiresClause) { setImplicit(isImplicitlyDeclared); } @@ -2637,7 +2643,8 @@ class CXXDestructorDecl : public CXXMethodDecl { const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo, bool isInline, bool isImplicitlyDeclared, - ConstexprSpecKind ConstexprKind); + ConstexprSpecKind ConstexprKind, + Expr *TrailingRequiresClause = nullptr); static CXXDestructorDecl *CreateDeserialized(ASTContext & C, unsigned ID); void setOperatorDelete(FunctionDecl *OD, Expr *ThisArg); @@ -2676,9 +2683,11 @@ class CXXConversionDecl : public CXXMethodDecl { CXXConversionDecl(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc, const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo, bool isInline, ExplicitSpecifier ES, - ConstexprSpecKind ConstexprKind, SourceLocation EndLocation) + ConstexprSpecKind ConstexprKind, SourceLocation EndLocation, + Expr *TrailingRequiresClause = nullptr) : CXXMethodDecl(CXXConversion, C, RD, StartLoc, NameInfo, T, TInfo, - SC_None, isInline, ConstexprKind, EndLocation), + SC_None, isInline, ConstexprKind, EndLocation, + TrailingRequiresClause), ExplicitSpec(ES) {} void anchor() override; @@ -2694,7 +2703,7 @@ class CXXConversionDecl : public CXXMethodDecl { Create(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc, const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo, bool isInline, ExplicitSpecifier ES, ConstexprSpecKind ConstexprKind, - SourceLocation EndLocation); + SourceLocation EndLocation, Expr *TrailingRequiresClause = nullptr); static CXXConversionDecl *CreateDeserialized(ASTContext &C, unsigned ID); ExplicitSpecifier getExplicitSpecifier() { diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 19dd62b0fe0fd..144ef221d6920 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -2030,6 +2030,11 @@ bool RecursiveASTVisitor::TraverseFunctionHelper(FunctionDecl *D) { } } + // Visit the trailing requires clause, if any. + if (Expr *TrailingRequiresClause = D->getTrailingRequiresClause()) { + TRY_TO(TraverseStmt(TrailingRequiresClause)); + } + if (CXXConstructorDecl *Ctor = dyn_cast(D)) { // Constructor initializers. for (auto *I : Ctor->inits()) { diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index af7fb92d230d6..cae44fb274a4a 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -178,8 +178,8 @@ class FunctionArgument : Argument; class NamedArgument : Argument; + opt, + fake>; class TypeArgument : Argument; class UnsignedArgument : Argument; class VariadicUnsignedArgument : Argument; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 997a3c3d84dcc..71f2ea8cd0df5 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -4689,7 +4689,7 @@ below. The explicit attribute annotation indicates that the third parameter (`start_routine`) is called zero or more times by the `pthread_create` function, and that the fourth parameter (`arg`) is passed along. Note that the callback behavior of `pthread_create` is automatically recognized by Clang. In addition, -the declarations of `__kmpc_fork_teams` and `__kmpc_fork_call`, generated for +the declarations of `__kmpc_fork_teams` and `__kmpc_fork_call`, generated for `#pragma omp target teams` and `#pragma omp parallel`, respectively, are also automatically recognized as broker functions. Further functions might be added in the future. diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index c8c1d73cb5d9e..a4f8300aa38e1 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -984,6 +984,7 @@ LIBBUILTIN(longjmp, "vJi", "fr", "setjmp.h", ALL_LANGUAGES) LIBBUILTIN(alloca, "v*z", "f", "stdlib.h", ALL_GNU_LANGUAGES) // POSIX string.h LIBBUILTIN(memccpy, "v*v*vC*iz", "f", "string.h", ALL_GNU_LANGUAGES) +LIBBUILTIN(mempcpy, "v*v*vC*z", "f", "string.h", ALL_GNU_LANGUAGES) LIBBUILTIN(stpcpy, "c*c*cC*", "f", "string.h", ALL_GNU_LANGUAGES) LIBBUILTIN(stpncpy, "c*c*cC*z", "f", "string.h", ALL_GNU_LANGUAGES) LIBBUILTIN(strdup, "c*cC*", "f", "string.h", ALL_GNU_LANGUAGES) diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index 0a8484d983f33..0d9d31670157d 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -181,6 +181,13 @@ def err_function_declared_typedef : Error< def err_at_defs_cxx : Error<"@defs is not supported in Objective-C++">; def err_at_in_class : Error<"unexpected '@' in member specification">; def err_unexpected_semi : Error<"unexpected ';' before %0">; +def err_unparenthesized_non_primary_expr_in_requires_clause : Error< + "parentheses are required around this expression in a requires clause">; +def note_unparenthesized_non_primary_expr_in_requires_clause : Note< + "parentheses are required around this expression in a requires clause">; +def err_potential_function_call_in_constraint_logical_or : Error< + "function call must be parenthesized to be considered part of the requires " + "clause">; def err_expected_fn_body : Error< "expected function body after function declarator">; @@ -309,6 +316,12 @@ def err_init_list_bin_op : Error<"initializer list cannot be used on the " def warn_cxx98_compat_trailing_return_type : Warning< "trailing return types are incompatible with C++98">, InGroup, DefaultIgnore; +def err_requires_clause_must_appear_after_trailing_return : Error< + "trailing return type must appear before trailing requires clause">; +def err_requires_clause_on_declarator_not_declaring_a_function : Error< + "trailing requires clause can only be used when declaring a function">; +def err_requires_clause_inside_parens : Error< + "trailing requires clause should be placed outside parentheses">; def ext_auto_storage_class : ExtWarn< "'auto' storage class specifier is not permitted in C++11, and will not " "be supported in future releases">, InGroup>; @@ -880,7 +893,7 @@ def warn_cxx98_compat_lambda : Warning< InGroup, DefaultIgnore; def err_lambda_missing_parens : Error< "lambda requires '()' before %select{'mutable'|return type|" - "attribute specifier|'constexpr'|'consteval'}0">; + "attribute specifier|'constexpr'|'consteval'|'requires' clause}0">; def err_lambda_decl_specifier_repeated : Error< "%select{'mutable'|'constexpr'|'consteval'}0 cannot appear multiple times in a lambda declarator">; def err_lambda_capture_misplaced_ellipsis : Error< diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 5fa7d23c567cc..f82cdadd9ff47 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -2618,6 +2618,18 @@ def note_single_arg_concept_specialization_constraint_evaluated_to_false : Note< "%select{and |because }0%1 does not satisfy %2">; def note_atomic_constraint_evaluated_to_false_elaborated : Note< "%select{and |because }0'%1' (%2 %3 %4) evaluated to false">; +def err_constrained_virtual_method : Error< + "virtual function cannot have a requires clause">; +def err_trailing_requires_clause_on_deduction_guide : Error< + "deduction guide cannot have a requires clause">; +def err_reference_to_function_with_unsatisfied_constraints : Error< + "invalid reference to function %0: constraints not satisfied">; +def note_ambiguous_atomic_constraints : Note< + "similar constraint expressions not considered equivalent; constraint " + "expressions cannot be considered equivalent unless they originate from the " + "same concept">; +def note_ambiguous_atomic_constraints_similar_expression : Note< + "similar constraint expression here">; def err_template_different_requires_clause : Error< "requires clause differs in template redeclaration">; @@ -3952,6 +3964,9 @@ def note_ovl_candidate_disabled_by_extension : Note< def err_addrof_function_disabled_by_enable_if_attr : Error< "cannot take address of function %0 because it has one or more " "non-tautological enable_if conditions">; +def err_addrof_function_constraints_not_satisfied : Error< + "cannot take address of function %0 because its constraints are not " + "satisfied">; def note_addrof_ovl_candidate_disabled_by_enable_if_attr : Note< "candidate function made ineligible by enable_if">; def note_ovl_candidate_deduced_mismatch : Note< @@ -4065,6 +4080,9 @@ def note_ovl_candidate_bad_target : Note< "call to " "%select{__device__|__global__|__host__|__host__ __device__|invalid}3 function from" " %select{__device__|__global__|__host__|__host__ __device__|invalid}4 function">; +def note_ovl_candidate_constraints_not_satisfied : Note< + "candidate %sub{select_ovl_candidate_kind}0,1,2 not viable: constraints " + "not satisfied">; def note_implicit_member_target_infer_collision : Note< "implicit %sub{select_special_member_kind}0 inferred target collision: call to both " "%select{__device__|__global__|__host__|__host__ __device__}1 and " diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 87091a3250715..86a04e33ce760 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -684,7 +684,7 @@ let params = [s16, s32], pnt = PNT_NType in { defm vqrshrun : VSHRN; } let params = T.Int, pnt = PNT_NType in { - defm vsli : DyadicImmShift; + defm vsli : DyadicImmShift; defm vsri : DyadicImmShift; } diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index af5b281636fe9..ec7641ffbc52c 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -1679,6 +1679,9 @@ class Parser : public CodeCompletionHandler { ExprResult ParseConstantExpression(TypeCastState isTypeCast = NotTypeCast); ExprResult ParseCaseExpression(SourceLocation CaseLoc); ExprResult ParseConstraintExpression(); + ExprResult + ParseConstraintLogicalAndExpression(bool IsTrailingRequiresClause); + ExprResult ParseConstraintLogicalOrExpression(bool IsTrailingRequiresClause); // Expr that doesn't include commas. ExprResult ParseAssignmentExpression(TypeCastState isTypeCast = NotTypeCast); @@ -1693,15 +1696,23 @@ class Parser : public CodeCompletionHandler { ExprResult ParseRHSOfBinaryExpression(ExprResult LHS, prec::Level MinPrec); - ExprResult ParseCastExpression(bool isUnaryExpression, + /// Control what ParseCastExpression will parse. + enum CastParseKind { + AnyCastExpr = 0, + UnaryExprOnly, + PrimaryExprOnly + }; + ExprResult ParseCastExpression(CastParseKind ParseKind, bool isAddressOfOperand, bool &NotCastExpr, TypeCastState isTypeCast, - bool isVectorLiteral = false); - ExprResult ParseCastExpression(bool isUnaryExpression, + bool isVectorLiteral = false, + bool *NotPrimaryExpression = nullptr); + ExprResult ParseCastExpression(CastParseKind ParseKind, bool isAddressOfOperand = false, TypeCastState isTypeCast = NotTypeCast, - bool isVectorLiteral = false); + bool isVectorLiteral = false, + bool *NotPrimaryExpression = nullptr); /// Returns true if the next token cannot start an expression. bool isNotExpressionStart(); @@ -1910,6 +1921,11 @@ class Parser : public CodeCompletionHandler { ExprResult ParseCoyieldExpression(); + //===--------------------------------------------------------------------===// + // C++ Concepts + + void ParseTrailingRequiresClause(Declarator &D); + //===--------------------------------------------------------------------===// // C99 6.7.8: Initialization. @@ -2739,6 +2755,9 @@ class Parser : public CodeCompletionHandler { BalancedDelimiterTracker &Tracker, bool IsAmbiguous, bool RequiresArg = false); + void InitCXXThisScopeForDeclaratorIfRelevant( + const Declarator &D, const DeclSpec &DS, + llvm::Optional &ThisScope); bool ParseRefQualifier(bool &RefQualifierIsLValueRef, SourceLocation &RefQualifierLoc); bool isFunctionDeclaratorIdentifierList(); @@ -2846,10 +2865,11 @@ class Parser : public CodeCompletionHandler { Decl *TagDecl); ExprResult ParseCXXMemberInitializer(Decl *D, bool IsFunction, SourceLocation &EqualLoc); - bool ParseCXXMemberDeclaratorBeforeInitializer(Declarator &DeclaratorInfo, - VirtSpecifiers &VS, - ExprResult &BitfieldSize, - LateParsedAttrList &LateAttrs); + bool + ParseCXXMemberDeclaratorBeforeInitializer(Declarator &DeclaratorInfo, + VirtSpecifiers &VS, + ExprResult &BitfieldSize, + LateParsedAttrList &LateAttrs); void MaybeParseAndDiagnoseDeclSpecAfterCXX11VirtSpecifierSeq(Declarator &D, VirtSpecifiers &VS); DeclGroupPtrTy ParseCXXClassMemberDeclaration( diff --git a/clang/include/clang/Sema/DeclSpec.h b/clang/include/clang/Sema/DeclSpec.h index e3ead60bb43f6..aceec9cbe1c9e 100644 --- a/clang/include/clang/Sema/DeclSpec.h +++ b/clang/include/clang/Sema/DeclSpec.h @@ -1826,6 +1826,10 @@ class Declarator { /// The asm label, if specified. Expr *AsmLabel; + /// \brief The constraint-expression specified by the trailing + /// requires-clause, or null if no such clause was specified. + Expr *TrailingRequiresClause; + #ifndef _MSC_VER union { #endif @@ -1855,7 +1859,8 @@ class Declarator { GroupingParens(false), FunctionDefinition(FDK_Declaration), Redeclaration(false), Extension(false), ObjCIvar(false), ObjCWeakProperty(false), InlineStorageUsed(false), - Attrs(ds.getAttributePool().getFactory()), AsmLabel(nullptr) {} + Attrs(ds.getAttributePool().getFactory()), AsmLabel(nullptr), + TrailingRequiresClause(nullptr) {} ~Declarator() { clear(); @@ -2401,6 +2406,22 @@ class Declarator { return false; } + /// \brief Sets a trailing requires clause for this declarator. + void setTrailingRequiresClause(Expr *TRC) { + TrailingRequiresClause = TRC; + } + + /// \brief Sets a trailing requires clause for this declarator. + Expr *getTrailingRequiresClause() { + return TrailingRequiresClause; + } + + /// \brief Determine whether a trailing requires clause was written in this + /// declarator. + bool hasTrailingRequiresClause() const { + return TrailingRequiresClause != nullptr; + } + /// takeAttributes - Takes attributes from the given parsed-attributes /// set and add them to this declarator. /// diff --git a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h index e0c3ba13ef543..0ccb658c6a771 100644 --- a/clang/include/clang/Sema/Overload.h +++ b/clang/include/clang/Sema/Overload.h @@ -754,7 +754,11 @@ class Sema; /// This constructor/conversion candidate fail due to an address space /// mismatch between the object being constructed and the overload /// candidate. - ovl_fail_object_addrspace_mismatch + ovl_fail_object_addrspace_mismatch, + + /// This candidate was not viable because its associated constraints were + /// not satisfied. + ovl_fail_constraints_not_satisfied, }; /// A list of implicit conversion sequences for the arguments of an diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index e712745c963fb..cb436d8c9ed23 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -47,6 +47,7 @@ #include "clang/Sema/ObjCMethodList.h" #include "clang/Sema/Ownership.h" #include "clang/Sema/Scope.h" +#include "clang/Sema/SemaConcept.h" #include "clang/Sema/TypoCorrection.h" #include "clang/Sema/Weak.h" #include "llvm/ADT/ArrayRef.h" @@ -2428,6 +2429,8 @@ class Sema final { SkipBodyInfo *SkipBody = nullptr); Decl *ActOnStartOfFunctionDef(Scope *S, Decl *D, SkipBodyInfo *SkipBody = nullptr); + void ActOnStartTrailingRequiresClause(Scope *S, Declarator &D); + ExprResult ActOnFinishTrailingRequiresClause(ExprResult ConstraintExpr); void ActOnStartOfObjCMethodDef(Scope *S, Decl *D); bool isObjCMethodDecl(Decl *D) { return D && isa(D); @@ -3012,7 +3015,8 @@ class Sema final { NamedDecl *&OldDecl, bool IsForUsingDecl); bool IsOverload(FunctionDecl *New, FunctionDecl *Old, bool IsForUsingDecl, - bool ConsiderCudaAttrs = true); + bool ConsiderCudaAttrs = true, + bool ConsiderRequiresClauses = true); ImplicitConversionSequence TryImplicitConversion(Expr *From, QualType ToType, @@ -3377,10 +3381,9 @@ class Sema final { bool *pHadMultipleCandidates = nullptr); FunctionDecl * - resolveAddressOfOnlyViableOverloadCandidate(Expr *E, - DeclAccessPair &FoundResult); + resolveAddressOfSingleOverloadCandidate(Expr *E, DeclAccessPair &FoundResult); - bool resolveAndFixAddressOfOnlyViableOverloadCandidate( + bool resolveAndFixAddressOfSingleOverloadCandidate( ExprResult &SrcExpr, bool DoFunctionPointerConversion = false); FunctionDecl * @@ -6172,7 +6175,8 @@ class Sema final { TypeSourceInfo *MethodType, SourceLocation EndLoc, ArrayRef Params, - ConstexprSpecKind ConstexprKind); + ConstexprSpecKind ConstexprKind, + Expr *TrailingRequiresClause); /// Number lambda for linkage purposes if necessary. void handleLambdaNumbering( @@ -6306,16 +6310,35 @@ class Sema final { Expr *Src); /// Check whether the given expression is a valid constraint expression. - /// A diagnostic is emitted if it is not, and false is returned. - bool CheckConstraintExpression(Expr *CE); + /// A diagnostic is emitted if it is not, false is returned, and + /// PossibleNonPrimary will be set to true if the failure might be due to a + /// non-primary expression being used as an atomic constraint. + bool CheckConstraintExpression(Expr *CE, Token NextToken = Token(), + bool *PossibleNonPrimary = nullptr, + bool IsTrailingRequiresClause = false); + + /// Check whether the given type-dependent expression will be the name of a + /// function or another callable function-like entity (e.g. a function + // template or overload set) for any substitution. + bool IsDependentFunctionNameExpr(Expr *E); private: - /// \brief Caches pairs of template-like decls whose associated constraints - /// were checked for subsumption and whether or not the first's constraints - /// did in fact subsume the second's. + /// Caches pairs of template-like decls whose associated constraints were + /// checked for subsumption and whether or not the first's constraints did in + /// fact subsume the second's. llvm::DenseMap, bool> SubsumptionCache; + /// Caches the normalized associated constraints of declarations (concepts or + /// constrained declarations). If an error occurred while normalizing the + /// associated constraints of the template or concept, nullptr will be cached + /// here. + llvm::DenseMap + NormalizationCache; public: + const NormalizedConstraint * + getNormalizedAssociatedConstraints( + NamedDecl *ConstrainedDecl, ArrayRef AssociatedConstraints); + /// \brief Check whether the given declaration's associated constraints are /// at least as constrained than another declaration's according to the /// partial ordering of constraints. @@ -6328,6 +6351,13 @@ class Sema final { NamedDecl *D2, ArrayRef AC2, bool &Result); + /// If D1 was not at least as constrained as D2, but would've been if a pair + /// of atomic constraints involved had been declared in a concept and not + /// repeated in two separate places in code. + /// \returns true if such a diagnostic was emitted, false otherwise. + bool MaybeEmitAmbiguousAtomicConstraintsDiagnostic(NamedDecl *D1, + ArrayRef AC1, NamedDecl *D2, ArrayRef AC2); + /// \brief Check whether the given list of constraint expressions are /// satisfied (as if in a 'conjunction') given template arguments. /// \param ConstraintExprs a list of constraint expressions, treated as if @@ -8781,6 +8811,10 @@ class Sema final { void InstantiateExceptionSpec(SourceLocation PointOfInstantiation, FunctionDecl *Function); + bool CheckInstantiatedFunctionTemplateConstraints( + SourceLocation PointOfInstantiation, FunctionDecl *Decl, + ArrayRef TemplateArgs, + ConstraintSatisfaction &Satisfaction); FunctionDecl *InstantiateFunctionDeclaration(FunctionTemplateDecl *FTD, const TemplateArgumentList *Args, SourceLocation Loc); diff --git a/clang/include/clang/Sema/SemaConcept.h b/clang/include/clang/Sema/SemaConcept.h new file mode 100644 index 0000000000000..acd1e604211a1 --- /dev/null +++ b/clang/include/clang/Sema/SemaConcept.h @@ -0,0 +1,145 @@ +//===-- SemaConcept.h - Semantic Analysis for Constraints and Concepts ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +// This file provides semantic analysis for C++ constraints and concepts. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_SEMA_SEMACONCEPT_H +#define LLVM_CLANG_SEMA_SEMACONCEPT_H +#include "clang/AST/ASTContext.h" +#include "clang/AST/Expr.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +namespace clang { +class Sema; + +struct AtomicConstraint { + const Expr *ConstraintExpr; + Optional> ParameterMapping; + + AtomicConstraint(Sema &S, const Expr *ConstraintExpr) : + ConstraintExpr(ConstraintExpr) { }; + + bool hasMatchingParameterMapping(ASTContext &C, + const AtomicConstraint &Other) const { + if (!ParameterMapping != !Other.ParameterMapping) + return false; + if (!ParameterMapping) + return true; + if (ParameterMapping->size() != Other.ParameterMapping->size()) + return false; + + for (unsigned I = 0, S = ParameterMapping->size(); I < S; ++I) + if (!C.getCanonicalTemplateArgument((*ParameterMapping)[I].getArgument()) + .structurallyEquals(C.getCanonicalTemplateArgument( + (*Other.ParameterMapping)[I].getArgument()))) + return false; + return true; + } + + bool subsumes(ASTContext &C, const AtomicConstraint &Other) const { + // C++ [temp.constr.order] p2 + // - an atomic constraint A subsumes another atomic constraint B + // if and only if the A and B are identical [...] + // + // C++ [temp.constr.atomic] p2 + // Two atomic constraints are identical if they are formed from the + // same expression and the targets of the parameter mappings are + // equivalent according to the rules for expressions [...] + + // We do not actually substitute the parameter mappings into the + // constraint expressions, therefore the constraint expressions are + // the originals, and comparing them will suffice. + if (ConstraintExpr != Other.ConstraintExpr) + return false; + + // Check that the parameter lists are identical + return hasMatchingParameterMapping(C, Other); + } +}; + +/// \brief A normalized constraint, as defined in C++ [temp.constr.normal], is +/// either an atomic constraint, a conjunction of normalized constraints or a +/// disjunction of normalized constraints. +struct NormalizedConstraint { + friend class Sema; + + enum CompoundConstraintKind { CCK_Conjunction, CCK_Disjunction }; + + using CompoundConstraint = llvm::PointerIntPair< + std::pair *, 1, + CompoundConstraintKind>; + + llvm::PointerUnion Constraint; + + NormalizedConstraint(AtomicConstraint *C): Constraint{C} { }; + NormalizedConstraint(ASTContext &C, NormalizedConstraint LHS, + NormalizedConstraint RHS, CompoundConstraintKind Kind) + : Constraint{CompoundConstraint{ + new (C) std::pair{ + std::move(LHS), std::move(RHS)}, Kind}} { }; + + NormalizedConstraint(ASTContext &C, const NormalizedConstraint &Other) { + if (Other.isAtomic()) { + Constraint = new (C) AtomicConstraint(*Other.getAtomicConstraint()); + } else { + Constraint = CompoundConstraint( + new (C) std::pair{ + NormalizedConstraint(C, Other.getLHS()), + NormalizedConstraint(C, Other.getRHS())}, + Other.getCompoundKind()); + } + } + NormalizedConstraint(NormalizedConstraint &&Other): + Constraint(Other.Constraint) { + Other.Constraint = nullptr; + } + NormalizedConstraint &operator=(const NormalizedConstraint &Other) = delete; + NormalizedConstraint &operator=(NormalizedConstraint &&Other) { + if (&Other != this) { + NormalizedConstraint Temp(std::move(Other)); + std::swap(Constraint, Temp.Constraint); + } + return *this; + } + + CompoundConstraintKind getCompoundKind() const { + assert(!isAtomic() && "getCompoundKind called on atomic constraint."); + return Constraint.get().getInt(); + } + + bool isAtomic() const { return Constraint.is(); } + + NormalizedConstraint &getLHS() const { + assert(!isAtomic() && "getLHS called on atomic constraint."); + return Constraint.get().getPointer()->first; + } + + NormalizedConstraint &getRHS() const { + assert(!isAtomic() && "getRHS called on atomic constraint."); + return Constraint.get().getPointer()->second; + } + + AtomicConstraint *getAtomicConstraint() const { + assert(isAtomic() && + "getAtomicConstraint called on non-atomic constraint."); + return Constraint.get(); + } + +private: + static Optional + fromConstraintExprs(Sema &S, NamedDecl *D, ArrayRef E); + static Optional + fromConstraintExpr(Sema &S, NamedDecl *D, const Expr *E); +}; + +} // clang + +#endif //LLVM_CLANG_SEMA_SEMACONCEPT_H diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 567d2bf7d228d..f6c3aa1a3c1d2 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -3279,10 +3279,12 @@ ExpectedDecl ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) { TypeSourceInfo *TInfo; SourceLocation ToInnerLocStart, ToEndLoc; NestedNameSpecifierLoc ToQualifierLoc; + Expr *TrailingRequiresClause; if (auto Imp = importSeq( FromTy, D->getTypeSourceInfo(), D->getInnerLocStart(), - D->getQualifierLoc(), D->getEndLoc())) - std::tie(T, TInfo, ToInnerLocStart, ToQualifierLoc, ToEndLoc) = *Imp; + D->getQualifierLoc(), D->getEndLoc(), D->getTrailingRequiresClause())) + std::tie(T, TInfo, ToInnerLocStart, ToQualifierLoc, ToEndLoc, + TrailingRequiresClause) = *Imp; else return Imp.takeError(); @@ -3311,7 +3313,10 @@ ExpectedDecl ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) { ExplicitSpecifier( ExplicitExpr, FromConstructor->getExplicitSpecifier().getKind()), - D->isInlineSpecified(), D->isImplicit(), D->getConstexprKind())) + D->isInlineSpecified(), D->isImplicit(), D->getConstexprKind(), + InheritedConstructor(), // FIXME: Properly import inherited + // constructor info + TrailingRequiresClause)) return ToFunction; } else if (CXXDestructorDecl *FromDtor = dyn_cast(D)) { @@ -3329,7 +3334,7 @@ ExpectedDecl ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) { if (GetImportedOrCreateDecl( ToFunction, D, Importer.getToContext(), cast(DC), ToInnerLocStart, NameInfo, T, TInfo, D->isInlineSpecified(), - D->isImplicit(), D->getConstexprKind())) + D->isImplicit(), D->getConstexprKind(), TrailingRequiresClause)) return ToFunction; CXXDestructorDecl *ToDtor = cast(ToFunction); @@ -3349,20 +3354,21 @@ ExpectedDecl ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) { ToInnerLocStart, NameInfo, T, TInfo, D->isInlineSpecified(), ExplicitSpecifier(ExplicitExpr, FromConversion->getExplicitSpecifier().getKind()), - D->getConstexprKind(), SourceLocation())) + D->getConstexprKind(), SourceLocation(), TrailingRequiresClause)) return ToFunction; } else if (auto *Method = dyn_cast(D)) { if (GetImportedOrCreateDecl( ToFunction, D, Importer.getToContext(), cast(DC), ToInnerLocStart, NameInfo, T, TInfo, Method->getStorageClass(), Method->isInlineSpecified(), D->getConstexprKind(), - SourceLocation())) + SourceLocation(), TrailingRequiresClause)) return ToFunction; } else { if (GetImportedOrCreateDecl( ToFunction, D, Importer.getToContext(), DC, ToInnerLocStart, NameInfo, T, TInfo, D->getStorageClass(), D->isInlineSpecified(), - D->hasWrittenPrototype(), D->getConstexprKind())) + D->hasWrittenPrototype(), D->getConstexprKind(), + TrailingRequiresClause)) return ToFunction; } diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 6cfd4c2a2a218..be59d88b73f13 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -1839,23 +1839,27 @@ void DeclaratorDecl::setQualifierInfo(NestedNameSpecifierLoc QualifierLoc) { } // Set qualifier info. getExtInfo()->QualifierLoc = QualifierLoc; - } else { + } else if (hasExtInfo()) { // Here Qualifier == 0, i.e., we are removing the qualifier (if any). - if (hasExtInfo()) { - if (getExtInfo()->NumTemplParamLists == 0) { - // Save type source info pointer. - TypeSourceInfo *savedTInfo = getExtInfo()->TInfo; - // Deallocate the extended decl info. - getASTContext().Deallocate(getExtInfo()); - // Restore savedTInfo into (non-extended) decl info. - DeclInfo = savedTInfo; - } - else - getExtInfo()->QualifierLoc = QualifierLoc; - } + getExtInfo()->QualifierLoc = QualifierLoc; } } +void DeclaratorDecl::setTrailingRequiresClause(Expr *TrailingRequiresClause) { + assert(TrailingRequiresClause); + // Make sure the extended decl info is allocated. + if (!hasExtInfo()) { + // Save (non-extended) type source info pointer. + auto *savedTInfo = DeclInfo.get(); + // Allocate external info struct. + DeclInfo = new (getASTContext()) ExtInfo; + // Restore savedTInfo into (extended) decl info. + getExtInfo()->TInfo = savedTInfo; + } + // Set requires clause info. + getExtInfo()->TrailingRequiresClause = TrailingRequiresClause; +} + void DeclaratorDecl::setTemplateParameterListsInfo( ASTContext &Context, ArrayRef TPLists) { assert(!TPLists.empty()); @@ -2777,7 +2781,8 @@ FunctionDecl::FunctionDecl(Kind DK, ASTContext &C, DeclContext *DC, const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo, StorageClass S, bool isInlineSpecified, - ConstexprSpecKind ConstexprKind) + ConstexprSpecKind ConstexprKind, + Expr *TrailingRequiresClause) : DeclaratorDecl(DK, DC, NameInfo.getLoc(), NameInfo.getName(), T, TInfo, StartLoc), DeclContext(DK), redeclarable_base(C), Body(), ODRHash(0), @@ -2807,6 +2812,8 @@ FunctionDecl::FunctionDecl(Kind DK, ASTContext &C, DeclContext *DC, FunctionDeclBits.IsMultiVersion = false; FunctionDeclBits.IsCopyDeductionCandidate = false; FunctionDeclBits.HasODRHash = false; + if (TrailingRequiresClause) + setTrailingRequiresClause(TrailingRequiresClause); } void FunctionDecl::getNameForDiagnostic( @@ -3872,6 +3879,11 @@ unsigned FunctionDecl::getMemoryFunctionKind() const { case Builtin::BImemcpy: return Builtin::BImemcpy; + case Builtin::BI__builtin_mempcpy: + case Builtin::BI__builtin___mempcpy_chk: + case Builtin::BImempcpy: + return Builtin::BImempcpy; + case Builtin::BI__builtin_memmove: case Builtin::BI__builtin___memmove_chk: case Builtin::BImemmove: @@ -3929,6 +3941,8 @@ unsigned FunctionDecl::getMemoryFunctionKind() const { return Builtin::BImemset; else if (FnInfo->isStr("memcpy")) return Builtin::BImemcpy; + else if (FnInfo->isStr("mempcpy")) + return Builtin::BImempcpy; else if (FnInfo->isStr("memmove")) return Builtin::BImemmove; else if (FnInfo->isStr("memcmp")) @@ -4683,10 +4697,12 @@ FunctionDecl *FunctionDecl::Create(ASTContext &C, DeclContext *DC, QualType T, TypeSourceInfo *TInfo, StorageClass SC, bool isInlineSpecified, bool hasWrittenPrototype, - ConstexprSpecKind ConstexprKind) { + ConstexprSpecKind ConstexprKind, + Expr *TrailingRequiresClause) { FunctionDecl *New = new (C, DC) FunctionDecl(Function, C, DC, StartLoc, NameInfo, T, TInfo, - SC, isInlineSpecified, ConstexprKind); + SC, isInlineSpecified, ConstexprKind, + TrailingRequiresClause); New->setHasWrittenPrototype(hasWrittenPrototype); return New; } @@ -4694,7 +4710,7 @@ FunctionDecl *FunctionDecl::Create(ASTContext &C, DeclContext *DC, FunctionDecl *FunctionDecl::CreateDeserialized(ASTContext &C, unsigned ID) { return new (C, ID) FunctionDecl(Function, C, nullptr, SourceLocation(), DeclarationNameInfo(), QualType(), nullptr, - SC_None, false, CSK_unspecified); + SC_None, false, CSK_unspecified, nullptr); } BlockDecl *BlockDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L) { diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp index caa60408b5b67..bc75c4e544d28 100644 --- a/clang/lib/AST/DeclCXX.cpp +++ b/clang/lib/AST/DeclCXX.cpp @@ -2041,16 +2041,19 @@ CXXMethodDecl *CXXMethodDecl::Create(ASTContext &C, CXXRecordDecl *RD, QualType T, TypeSourceInfo *TInfo, StorageClass SC, bool isInline, ConstexprSpecKind ConstexprKind, - SourceLocation EndLocation) { + SourceLocation EndLocation, + Expr *TrailingRequiresClause) { return new (C, RD) CXXMethodDecl(CXXMethod, C, RD, StartLoc, NameInfo, T, TInfo, SC, - isInline, ConstexprKind, EndLocation); + isInline, ConstexprKind, EndLocation, + TrailingRequiresClause); } CXXMethodDecl *CXXMethodDecl::CreateDeserialized(ASTContext &C, unsigned ID) { return new (C, ID) CXXMethodDecl( CXXMethod, C, nullptr, SourceLocation(), DeclarationNameInfo(), - QualType(), nullptr, SC_None, false, CSK_unspecified, SourceLocation()); + QualType(), nullptr, SC_None, false, CSK_unspecified, SourceLocation(), + nullptr); } CXXMethodDecl *CXXMethodDecl::getDevirtualizedMethod(const Expr *Base, @@ -2431,9 +2434,11 @@ CXXConstructorDecl::CXXConstructorDecl( ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc, const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo, ExplicitSpecifier ES, bool isInline, bool isImplicitlyDeclared, - ConstexprSpecKind ConstexprKind, InheritedConstructor Inherited) + ConstexprSpecKind ConstexprKind, InheritedConstructor Inherited, + Expr *TrailingRequiresClause) : CXXMethodDecl(CXXConstructor, C, RD, StartLoc, NameInfo, T, TInfo, - SC_None, isInline, ConstexprKind, SourceLocation()) { + SC_None, isInline, ConstexprKind, SourceLocation(), + TrailingRequiresClause) { setNumCtorInitializers(0); setInheritingConstructor(static_cast(Inherited)); setImplicit(isImplicitlyDeclared); @@ -2457,7 +2462,7 @@ CXXConstructorDecl *CXXConstructorDecl::CreateDeserialized(ASTContext &C, auto *Result = new (C, ID, Extra) CXXConstructorDecl(C, nullptr, SourceLocation(), DeclarationNameInfo(), QualType(), nullptr, ExplicitSpecifier(), false, false, - CSK_unspecified, InheritedConstructor()); + CSK_unspecified, InheritedConstructor(), nullptr); Result->setInheritingConstructor(isInheritingConstructor); Result->CXXConstructorDeclBits.HasTrailingExplicitSpecifier = hasTraillingExplicit; @@ -2469,7 +2474,8 @@ CXXConstructorDecl *CXXConstructorDecl::Create( ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc, const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo, ExplicitSpecifier ES, bool isInline, bool isImplicitlyDeclared, - ConstexprSpecKind ConstexprKind, InheritedConstructor Inherited) { + ConstexprSpecKind ConstexprKind, InheritedConstructor Inherited, + Expr *TrailingRequiresClause) { assert(NameInfo.getName().getNameKind() == DeclarationName::CXXConstructorName && "Name must refer to a constructor"); @@ -2478,7 +2484,8 @@ CXXConstructorDecl *CXXConstructorDecl::Create( Inherited ? 1 : 0, ES.getExpr() ? 1 : 0); return new (C, RD, Extra) CXXConstructorDecl(C, RD, StartLoc, NameInfo, T, TInfo, ES, isInline, - isImplicitlyDeclared, ConstexprKind, Inherited); + isImplicitlyDeclared, ConstexprKind, Inherited, + TrailingRequiresClause); } CXXConstructorDecl::init_const_iterator CXXConstructorDecl::init_begin() const { @@ -2599,19 +2606,22 @@ CXXDestructorDecl * CXXDestructorDecl::CreateDeserialized(ASTContext &C, unsigned ID) { return new (C, ID) CXXDestructorDecl(C, nullptr, SourceLocation(), DeclarationNameInfo(), - QualType(), nullptr, false, false, CSK_unspecified); + QualType(), nullptr, false, false, CSK_unspecified, + nullptr); } CXXDestructorDecl *CXXDestructorDecl::Create( ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc, const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo, - bool isInline, bool isImplicitlyDeclared, ConstexprSpecKind ConstexprKind) { + bool isInline, bool isImplicitlyDeclared, ConstexprSpecKind ConstexprKind, + Expr *TrailingRequiresClause) { assert(NameInfo.getName().getNameKind() == DeclarationName::CXXDestructorName && "Name must refer to a destructor"); return new (C, RD) CXXDestructorDecl(C, RD, StartLoc, NameInfo, T, TInfo, isInline, - isImplicitlyDeclared, ConstexprKind); + isImplicitlyDeclared, ConstexprKind, + TrailingRequiresClause); } void CXXDestructorDecl::setOperatorDelete(FunctionDecl *OD, Expr *ThisArg) { @@ -2630,20 +2640,20 @@ CXXConversionDecl * CXXConversionDecl::CreateDeserialized(ASTContext &C, unsigned ID) { return new (C, ID) CXXConversionDecl( C, nullptr, SourceLocation(), DeclarationNameInfo(), QualType(), nullptr, - false, ExplicitSpecifier(), CSK_unspecified, SourceLocation()); + false, ExplicitSpecifier(), CSK_unspecified, SourceLocation(), nullptr); } CXXConversionDecl *CXXConversionDecl::Create( ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc, const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo, bool isInline, ExplicitSpecifier ES, ConstexprSpecKind ConstexprKind, - SourceLocation EndLocation) { + SourceLocation EndLocation, Expr *TrailingRequiresClause) { assert(NameInfo.getName().getNameKind() == DeclarationName::CXXConversionFunctionName && "Name must refer to a conversion function"); return new (C, RD) CXXConversionDecl(C, RD, StartLoc, NameInfo, T, TInfo, isInline, ES, - ConstexprKind, EndLocation); + ConstexprKind, EndLocation, TrailingRequiresClause); } bool CXXConversionDecl::isLambdaToBlockPointerConversion() const { diff --git a/clang/lib/AST/DeclPrinter.cpp b/clang/lib/AST/DeclPrinter.cpp index 0702256d7a2be..50aab3c080954 100644 --- a/clang/lib/AST/DeclPrinter.cpp +++ b/clang/lib/AST/DeclPrinter.cpp @@ -746,6 +746,11 @@ void DeclPrinter::VisitFunctionDecl(FunctionDecl *D) { Proto.clear(); } Out << Proto; + + if (Expr *TrailingRequiresClause = D->getTrailingRequiresClause()) { + Out << " requires "; + TrailingRequiresClause->printPretty(Out, nullptr, SubPolicy, Indentation); + } } else { Ty.print(Out, Policy, Proto); } diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp index 23734396b7694..59fa7faad927d 100755 --- a/clang/lib/AST/DeclTemplate.cpp +++ b/clang/lib/AST/DeclTemplate.cpp @@ -171,13 +171,18 @@ void TemplateDecl::anchor() {} void TemplateDecl:: getAssociatedConstraints(llvm::SmallVectorImpl &AC) const { - // TODO: Concepts: Append function trailing requires clause. TemplateParams->getAssociatedConstraints(AC); + if (auto *FD = dyn_cast_or_null(getTemplatedDecl())) + if (const Expr *TRC = FD->getTrailingRequiresClause()) + AC.push_back(TRC); } bool TemplateDecl::hasAssociatedConstraints() const { - // TODO: Concepts: Regard function trailing requires clause. - return TemplateParams->hasAssociatedConstraints(); + if (TemplateParams->hasAssociatedConstraints()) + return true; + if (auto *FD = dyn_cast_or_null(getTemplatedDecl())) + return FD->getTrailingRequiresClause(); + return false; } //===----------------------------------------------------------------------===// diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 1877d4a5ef70b..bc0ffb7fa440a 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -316,7 +316,8 @@ bool PPCTargetInfo::initFeatureMap( .Case("pwr8", true) .Default(false); - Features["spe"] = llvm::StringSwitch(CPU) + Features["spe"] = getTriple().getSubArch() == llvm::Triple::PPCSubArch_spe || + llvm::StringSwitch(CPU) .Case("8548", true) .Case("e500", true) .Default(false); diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index 3076025fc5b2b..270aa7ff91815 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -87,8 +87,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { // Note: GCC recognizes the following additional cpus: // 401, 403, 405, 405fp, 440fp, 464, 464fp, 476, 476fp, 505, 740, 801, - // 821, 823, 8540, 8548, e300c2, e300c3, e500mc64, e6500, 860, cell, - // titan, rs64. + // 821, 823, 8540, e300c2, e300c3, e500mc64, e6500, 860, cell, titan, rs64. bool isValidCPUName(StringRef Name) const override; void fillValidCPUList(SmallVectorImpl &Values) const override; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index b73cf1c6d35eb..e2c5f1b42f854 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2500,7 +2500,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(nullptr); } case Builtin::BImemcpy: - case Builtin::BI__builtin_memcpy: { + case Builtin::BI__builtin_memcpy: + case Builtin::BImempcpy: + case Builtin::BI__builtin_mempcpy: { Address Dest = EmitPointerWithAlignment(E->getArg(0)); Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *SizeVal = EmitScalarExpr(E->getArg(2)); @@ -2509,7 +2511,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD, 1); Builder.CreateMemCpy(Dest, Src, SizeVal, false); - return RValue::get(Dest.getPointer()); + if (BuiltinID == Builtin::BImempcpy || + BuiltinID == Builtin::BI__builtin_mempcpy) + return RValue::get(Builder.CreateInBoundsGEP(Dest.getPointer(), SizeVal)); + else + return RValue::get(Dest.getPointer()); } case Builtin::BI__builtin_char_memchr: @@ -4330,9 +4336,29 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(V); } - // See if we have a target specific builtin that needs to be lowered. - if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) - return RValue::get(V); + // Some target-specific builtins can have aggregate return values, e.g. + // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force + // ReturnValue to be non-null, so that the target-specific emission code can + // always just emit into it. + TypeEvaluationKind EvalKind = getEvaluationKind(E->getType()); + if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) { + Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp"); + ReturnValue = ReturnValueSlot(DestPtr, false); + } + + // Now see if we can emit a target-specific builtin. + if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) { + switch (EvalKind) { + case TEK_Scalar: + return RValue::get(V); + case TEK_Aggregate: + return RValue::getAggregate(ReturnValue.getValue(), + ReturnValue.isVolatile()); + case TEK_Complex: + llvm_unreachable("No current target builtin returns complex"); + } + llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr"); + } ErrorUnsupported(E, "builtin function"); diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp index 479cd8ec77cec..a27b6d4ed6374 100644 --- a/clang/lib/CodeGen/CGObjCGNU.cpp +++ b/clang/lib/CodeGen/CGObjCGNU.cpp @@ -1236,6 +1236,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { // The first Interface we find may be a @class, // which should only be treated as the source of // truth in the absence of a true declaration. + assert(OID && "Failed to find ObjCInterfaceDecl"); const ObjCInterfaceDecl *OIDDef = OID->getDefinition(); if (OIDDef != nullptr) OID = OIDDef; @@ -3036,6 +3037,7 @@ llvm::Value *CGObjCGNU::GenerateProtocolRef(CodeGenFunction &CGF, llvm::Constant *&protocol = ExistingProtocols[PD->getNameAsString()]; if (!protocol) GenerateProtocol(PD); + assert(protocol && "Unknown protocol"); llvm::Type *T = CGM.getTypes().ConvertType(CGM.getContext().getObjCProtoType()); return CGF.Builder.CreateBitCast(protocol, llvm::PointerType::getUnqual(T)); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 11ba2cd9f11d9..a92aa37358d11 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -733,10 +733,6 @@ enum OpenMPRTLFunction { OMPRTL__tgt_target_teams_nowait, // Call to void __tgt_register_requires(int64_t flags); OMPRTL__tgt_register_requires, - // Call to void __tgt_register_lib(__tgt_bin_desc *desc); - OMPRTL__tgt_register_lib, - // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); - OMPRTL__tgt_unregister_lib, // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); OMPRTL__tgt_target_data_begin, @@ -2478,26 +2474,6 @@ llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); break; } - case OMPRTL__tgt_register_lib: { - // Build void __tgt_register_lib(__tgt_bin_desc *desc); - QualType ParamTy = - CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); - llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); - break; - } - case OMPRTL__tgt_unregister_lib: { - // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); - QualType ParamTy = - CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); - llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); - break; - } case OMPRTL__tgt_target_data_begin: { // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); @@ -4378,57 +4354,6 @@ QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { return TgtOffloadEntryQTy; } -QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { - // These are the types we need to build: - // struct __tgt_device_image{ - // void *ImageStart; // Pointer to the target code start. - // void *ImageEnd; // Pointer to the target code end. - // // We also add the host entries to the device image, as it may be useful - // // for the target runtime to have access to that information. - // __tgt_offload_entry *EntriesBegin; // Begin of the table with all - // // the entries. - // __tgt_offload_entry *EntriesEnd; // End of the table with all the - // // entries (non inclusive). - // }; - if (TgtDeviceImageQTy.isNull()) { - ASTContext &C = CGM.getContext(); - RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image"); - RD->startDefinition(); - addFieldToRecordDecl(C, RD, C.VoidPtrTy); - addFieldToRecordDecl(C, RD, C.VoidPtrTy); - addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); - addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); - RD->completeDefinition(); - TgtDeviceImageQTy = C.getRecordType(RD); - } - return TgtDeviceImageQTy; -} - -QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { - // struct __tgt_bin_desc{ - // int32_t NumDevices; // Number of devices supported. - // __tgt_device_image *DeviceImages; // Arrays of device images - // // (one per device). - // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the - // // entries. - // __tgt_offload_entry *EntriesEnd; // End of the table with all the - // // entries (non inclusive). - // }; - if (TgtBinaryDescriptorQTy.isNull()) { - ASTContext &C = CGM.getContext(); - RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc"); - RD->startDefinition(); - addFieldToRecordDecl( - C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); - addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); - addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); - addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); - RD->completeDefinition(); - TgtBinaryDescriptorQTy = C.getRecordType(RD); - } - return TgtBinaryDescriptorQTy; -} - namespace { struct PrivateHelpersTy { PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, @@ -11193,6 +11118,7 @@ bool checkContext( case llvm::Triple::wasm64: case llvm::Triple::renderscript32: case llvm::Triple::renderscript64: + case llvm::Triple::ve: return false; } } diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index c40308ee74971..8159f5e8b790f 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -439,29 +439,10 @@ class CGOpenMPRuntime { /// // (function or global) /// char *name; // Name of the function or global. /// size_t size; // Size of the entry info (0 if it a function). + /// int32_t flags; + /// int32_t reserved; /// }; QualType TgtOffloadEntryQTy; - /// struct __tgt_device_image{ - /// void *ImageStart; // Pointer to the target code start. - /// void *ImageEnd; // Pointer to the target code end. - /// // We also add the host entries to the device image, as it may be useful - /// // for the target runtime to have access to that information. - /// __tgt_offload_entry *EntriesBegin; // Begin of the table with all - /// // the entries. - /// __tgt_offload_entry *EntriesEnd; // End of the table with all the - /// // entries (non inclusive). - /// }; - QualType TgtDeviceImageQTy; - /// struct __tgt_bin_desc{ - /// int32_t NumDevices; // Number of devices supported. - /// __tgt_device_image *DeviceImages; // Arrays of device images - /// // (one per device). - /// __tgt_offload_entry *EntriesBegin; // Begin of the table with all the - /// // entries. - /// __tgt_offload_entry *EntriesEnd; // End of the table with all the - /// // entries (non inclusive). - /// }; - QualType TgtBinaryDescriptorQTy; /// Entity that registers the offloading constants that were emitted so /// far. class OffloadEntriesInfoManagerTy { @@ -717,12 +698,6 @@ class CGOpenMPRuntime { /// Returns __tgt_offload_entry type. QualType getTgtOffloadEntryQTy(); - /// Returns __tgt_device_image type. - QualType getTgtDeviceImageQTy(); - - /// Returns __tgt_bin_desc type. - QualType getTgtBinaryDescriptorQTy(); - /// Start scanning from statement \a S and and emit all target regions /// found along the way. /// \param S Starting statement. diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp index 68a57310ad402..ae1d7eaf7089f 100644 --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -63,12 +63,13 @@ static void getARMHWDivFeatures(const Driver &D, const Arg *A, } // Handle -mfpu=. -static void getARMFPUFeatures(const Driver &D, const Arg *A, +unsigned getARMFPUFeatures(const Driver &D, const Arg *A, const ArgList &Args, StringRef FPU, std::vector &Features) { unsigned FPUID = llvm::ARM::parseFPU(FPU); if (!llvm::ARM::getFPUFeatures(FPUID, Features)) D.Diag(clang::diag::err_drv_clang_unsupported) << A->getAsString(Args); + return FPUID; } // Decode ARM features from string like +[no]featureA+[no]featureB+... @@ -388,18 +389,20 @@ void arm::getARMTargetFeatures(const ToolChain &TC, checkARMCPUName(D, CPUArg, Args, CPUName, ArchName, ExtensionFeatures, Triple); // Honor -mfpu=. ClangAs gives preference to -Wa,-mfpu=. + unsigned FPUID = llvm::ARM::FK_INVALID; const Arg *FPUArg = Args.getLastArg(options::OPT_mfpu_EQ); if (WaFPU) { if (FPUArg) D.Diag(clang::diag::warn_drv_unused_argument) << FPUArg->getAsString(Args); - getARMFPUFeatures(D, WaFPU, Args, StringRef(WaFPU->getValue()).substr(6), - Features); + (void)getARMFPUFeatures(D, WaFPU, Args, StringRef(WaFPU->getValue()).substr(6), + Features); } else if (FPUArg) { - getARMFPUFeatures(D, FPUArg, Args, FPUArg->getValue(), Features); + FPUID = getARMFPUFeatures(D, FPUArg, Args, FPUArg->getValue(), Features); } else if (Triple.isAndroid() && getARMSubArchVersionNumber(Triple) >= 7) { const char *AndroidFPU = "neon"; - if (!llvm::ARM::getFPUFeatures(llvm::ARM::parseFPU(AndroidFPU), Features)) + FPUID = llvm::ARM::parseFPU(AndroidFPU); + if (!llvm::ARM::getFPUFeatures(FPUID, Features)) D.Diag(clang::diag::err_drv_clang_unsupported) << std::string("-mfpu=") + AndroidFPU; } @@ -454,21 +457,21 @@ void arm::getARMTargetFeatures(const ToolChain &TC, if (ABI == arm::FloatABI::Soft) { llvm::ARM::getFPUFeatures(llvm::ARM::FK_NONE, Features); - // Disable all features relating to hardware FP. - // FIXME: Disabling fpregs should be enough all by itself, since all - // the other FP features are dependent on it. However - // there is currently no easy way to test this in clang, so for - // now just be explicit and disable all known dependent features - // as well. - for (std::string Feature : { - "vfp2", "vfp2sp", - "vfp3", "vfp3sp", "vfp3d16", "vfp3d16sp", - "vfp4", "vfp4sp", "vfp4d16", "vfp4d16sp", - "fp-armv8", "fp-armv8sp", "fp-armv8d16", "fp-armv8d16sp", - "fullfp16", "neon", "crypto", "dotprod", "fp16fml", - "mve", "mve.fp", - "fp64", "d32", "fpregs"}) - Features.push_back(Args.MakeArgString("-" + Feature)); + // Disable all features relating to hardware FP, not already disabled by the + // above call. + Features.insert(Features.end(), {"-neon", "-crypto", "-dotprod", "-fp16fml", + "-mve", "-mve.fp", "-fpregs"}); + } else if (FPUID == llvm::ARM::FK_NONE) { + // -mfpu=none is *very* similar to -mfloat-abi=soft, only that it should not + // disable MVE-I. + Features.insert(Features.end(), + {"-neon", "-crypto", "-dotprod", "-fp16fml", "-mve.fp"}); + // Even though we remove MVE-FP, we still need to check if it was originally + // present among the requested extensions, because it implies MVE-I, which + // should not be disabled by -mfpu-none. + if (!llvm::is_contained(Features, "+mve") && + !llvm::is_contained(Features, "+mve.fp")) + Features.emplace_back("-fpregs"); } // En/disable crc code generation. diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp index 5881852b1424a..9a852141c6eea 100644 --- a/clang/lib/Lex/LiteralSupport.cpp +++ b/clang/lib/Lex/LiteralSupport.cpp @@ -1053,11 +1053,9 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) { auto StatusOrErr = Result.convertFromString(Str, APFloat::rmNearestTiesToEven); - if (!StatusOrErr) { - assert(false && "Invalid floating point representation"); - return APFloat::opInvalidOp; - } - return *StatusOrErr; + assert(StatusOrErr && "Invalid floating point representation"); + return !errorToBool(StatusOrErr.takeError()) ? *StatusOrErr + : APFloat::opInvalidOp; } static inline bool IsExponentPart(char c) { diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 908fbf76d77fb..695c485649a40 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -2014,6 +2014,9 @@ Parser::DeclGroupPtrTy Parser::ParseDeclGroup(ParsingDeclSpec &DS, return nullptr; } + if (Tok.is(tok::kw_requires)) + ParseTrailingRequiresClause(D); + // Save late-parsed attributes for now; they need to be parsed in the // appropriate function scope after the function Decl has been constructed. // These will be parsed in ParseFunctionDefinition or ParseLexedAttrList. @@ -2165,6 +2168,12 @@ Parser::DeclGroupPtrTy Parser::ParseDeclGroup(ParsingDeclSpec &DS, ParseDeclarator(D); if (!D.isInvalidType()) { + // C++2a [dcl.decl]p1 + // init-declarator: + // declarator initializer[opt] + // declarator requires-clause + if (Tok.is(tok::kw_requires)) + ParseTrailingRequiresClause(D); Decl *ThisDecl = ParseDeclarationAfterDeclarator(D); D.complete(ThisDecl); if (ThisDecl) @@ -6033,6 +6042,22 @@ void Parser::ParseDirectDeclarator(Declarator &D) { PrototypeScope.Exit(); } else if (Tok.is(tok::l_square)) { ParseBracketDeclarator(D); + } else if (Tok.is(tok::kw_requires) && D.hasGroupingParens()) { + // This declarator is declaring a function, but the requires clause is + // in the wrong place: + // void (f() requires true); + // instead of + // void f() requires true; + // or + // void (f()) requires true; + Diag(Tok, diag::err_requires_clause_inside_parens); + ConsumeToken(); + ExprResult TrailingRequiresClause = Actions.CorrectDelayedTyposInExpr( + ParseConstraintLogicalOrExpression(/*IsTrailingRequiresClause=*/true)); + if (TrailingRequiresClause.isUsable() && D.isFunctionDeclarator() && + !D.hasTrailingRequiresClause()) + // We're already ill-formed if we got here but we'll accept it anyway. + D.setTrailingRequiresClause(TrailingRequiresClause.get()); } else { break; } @@ -6213,6 +6238,47 @@ void Parser::ParseParenDeclarator(Declarator &D) { PrototypeScope.Exit(); } +void Parser::InitCXXThisScopeForDeclaratorIfRelevant( + const Declarator &D, const DeclSpec &DS, + llvm::Optional &ThisScope) { + // C++11 [expr.prim.general]p3: + // If a declaration declares a member function or member function + // template of a class X, the expression this is a prvalue of type + // "pointer to cv-qualifier-seq X" between the optional cv-qualifer-seq + // and the end of the function-definition, member-declarator, or + // declarator. + // FIXME: currently, "static" case isn't handled correctly. + bool IsCXX11MemberFunction = getLangOpts().CPlusPlus11 && + D.getDeclSpec().getStorageClassSpec() != DeclSpec::SCS_typedef && + (D.getContext() == DeclaratorContext::MemberContext + ? !D.getDeclSpec().isFriendSpecified() + : D.getContext() == DeclaratorContext::FileContext && + D.getCXXScopeSpec().isValid() && + Actions.CurContext->isRecord()); + if (!IsCXX11MemberFunction) + return; + + Qualifiers Q = Qualifiers::fromCVRUMask(DS.getTypeQualifiers()); + if (D.getDeclSpec().hasConstexprSpecifier() && !getLangOpts().CPlusPlus14) + Q.addConst(); + // FIXME: Collect C++ address spaces. + // If there are multiple different address spaces, the source is invalid. + // Carry on using the first addr space for the qualifiers of 'this'. + // The diagnostic will be given later while creating the function + // prototype for the method. + if (getLangOpts().OpenCLCPlusPlus) { + for (ParsedAttr &attr : DS.getAttributes()) { + LangAS ASIdx = attr.asOpenCLLangAS(); + if (ASIdx != LangAS::Default) { + Q.addAddressSpace(ASIdx); + break; + } + } + } + ThisScope.emplace(Actions, dyn_cast(Actions.CurContext), Q, + IsCXX11MemberFunction); +} + /// ParseFunctionDeclarator - We are after the identifier and have parsed the /// declarator D up to a paren, which indicates that we are parsing function /// arguments. @@ -6226,7 +6292,8 @@ void Parser::ParseParenDeclarator(Declarator &D) { /// /// For C++, after the parameter-list, it also parses the cv-qualifier-seq[opt], /// (C++11) ref-qualifier[opt], exception-specification[opt], -/// (C++11) attribute-specifier-seq[opt], and (C++11) trailing-return-type[opt]. +/// (C++11) attribute-specifier-seq[opt], (C++11) trailing-return-type[opt] and +/// (C++2a) the trailing requires-clause. /// /// [C++11] exception-specification: /// dynamic-exception-specification @@ -6321,43 +6388,8 @@ void Parser::ParseFunctionDeclarator(Declarator &D, if (ParseRefQualifier(RefQualifierIsLValueRef, RefQualifierLoc)) EndLoc = RefQualifierLoc; - // C++11 [expr.prim.general]p3: - // If a declaration declares a member function or member function - // template of a class X, the expression this is a prvalue of type - // "pointer to cv-qualifier-seq X" between the optional cv-qualifer-seq - // and the end of the function-definition, member-declarator, or - // declarator. - // FIXME: currently, "static" case isn't handled correctly. - bool IsCXX11MemberFunction = - getLangOpts().CPlusPlus11 && - D.getDeclSpec().getStorageClassSpec() != DeclSpec::SCS_typedef && - (D.getContext() == DeclaratorContext::MemberContext - ? !D.getDeclSpec().isFriendSpecified() - : D.getContext() == DeclaratorContext::FileContext && - D.getCXXScopeSpec().isValid() && - Actions.CurContext->isRecord()); - - Qualifiers Q = Qualifiers::fromCVRUMask(DS.getTypeQualifiers()); - if (D.getDeclSpec().hasConstexprSpecifier() && !getLangOpts().CPlusPlus14) - Q.addConst(); - // FIXME: Collect C++ address spaces. - // If there are multiple different address spaces, the source is invalid. - // Carry on using the first addr space for the qualifiers of 'this'. - // The diagnostic will be given later while creating the function - // prototype for the method. - if (getLangOpts().OpenCLCPlusPlus) { - for (ParsedAttr &attr : DS.getAttributes()) { - LangAS ASIdx = attr.asOpenCLLangAS(); - if (ASIdx != LangAS::Default) { - Q.addAddressSpace(ASIdx); - break; - } - } - } - - Sema::CXXThisScopeRAII ThisScope( - Actions, dyn_cast(Actions.CurContext), Q, - IsCXX11MemberFunction); + llvm::Optional ThisScope; + InitCXXThisScopeForDeclaratorIfRelevant(D, DS, ThisScope); // Parse exception-specification[opt]. bool Delayed = D.isFirstDeclarationOfMember() && @@ -6625,6 +6657,17 @@ void Parser::ParseParameterDeclarationClause( // Parse GNU attributes, if present. MaybeParseGNUAttributes(ParmDeclarator); + if (Tok.is(tok::kw_requires)) { + // User tried to define a requires clause in a parameter declaration, + // which is surely not a function declaration. + // void f(int (*g)(int, int) requires true); + Diag(Tok, + diag::err_requires_clause_on_declarator_not_declaring_a_function); + ConsumeToken(); + Actions.CorrectDelayedTyposInExpr( + ParseConstraintLogicalOrExpression(/*IsTrailingRequiresClause=*/true)); + } + // Remember this parsed parameter in ParamInfo. IdentifierInfo *ParmII = ParmDeclarator.getIdentifier(); diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index af3403403c11b..081d4d8b12092 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -2301,6 +2301,7 @@ bool Parser::ParseCXXMemberDeclaratorBeforeInitializer( LateParsedAttrList &LateParsedAttrs) { // member-declarator: // declarator pure-specifier[opt] + // declarator requires-clause // declarator brace-or-equal-initializer[opt] // identifier[opt] ':' constant-expression if (Tok.isNot(tok::colon)) @@ -2314,6 +2315,8 @@ bool Parser::ParseCXXMemberDeclaratorBeforeInitializer( BitfieldSize = ParseConstantExpression(); if (BitfieldSize.isInvalid()) SkipUntil(tok::comma, StopAtSemi | StopBeforeMatch); + } else if (Tok.is(tok::kw_requires)) { + ParseTrailingRequiresClause(DeclaratorInfo); } else { ParseOptionalCXX11VirtSpecifierSeq( VS, getCurrentClass().IsInterface, @@ -2436,6 +2439,7 @@ void Parser::MaybeParseAndDiagnoseDeclSpecAfterCXX11VirtSpecifierSeq( /// /// member-declarator: /// declarator virt-specifier-seq[opt] pure-specifier[opt] +/// [C++2a] declarator requires-clause /// declarator constant-initializer[opt] /// [C++11] declarator brace-or-equal-initializer[opt] /// identifier[opt] ':' constant-expression @@ -2669,6 +2673,7 @@ Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS, SmallVector DeclsInGroup; ExprResult BitfieldSize; + ExprResult TrailingRequiresClause; bool ExpectSemi = true; // Parse the first declarator. @@ -3793,6 +3798,62 @@ TypeResult Parser::ParseTrailingReturnType(SourceRange &Range, : DeclaratorContext::TrailingReturnContext); } +/// Parse a requires-clause as part of a function declaration. +void Parser::ParseTrailingRequiresClause(Declarator &D) { + assert(Tok.is(tok::kw_requires) && "expected requires"); + + SourceLocation RequiresKWLoc = ConsumeToken(); + + ExprResult TrailingRequiresClause; + ParseScope ParamScope(this, + Scope::DeclScope | + Scope::FunctionDeclarationScope | + Scope::FunctionPrototypeScope); + + Actions.ActOnStartTrailingRequiresClause(getCurScope(), D); + + llvm::Optional ThisScope; + InitCXXThisScopeForDeclaratorIfRelevant(D, D.getDeclSpec(), ThisScope); + + TrailingRequiresClause = + ParseConstraintLogicalOrExpression(/*IsTrailingRequiresClause=*/true); + + TrailingRequiresClause = + Actions.ActOnFinishTrailingRequiresClause(TrailingRequiresClause); + + if (!D.isDeclarationOfFunction()) { + Diag(RequiresKWLoc, + diag::err_requires_clause_on_declarator_not_declaring_a_function); + return; + } + + if (TrailingRequiresClause.isInvalid()) + SkipUntil({tok::l_brace, tok::arrow, tok::kw_try, tok::comma, tok::colon}, + StopAtSemi | StopBeforeMatch); + else + D.setTrailingRequiresClause(TrailingRequiresClause.get()); + + // Did the user swap the trailing return type and requires clause? + if (D.isFunctionDeclarator() && Tok.is(tok::arrow) && + D.getDeclSpec().getTypeSpecType() == TST_auto) { + SourceLocation ArrowLoc = Tok.getLocation(); + SourceRange Range; + TypeResult TrailingReturnType = + ParseTrailingReturnType(Range, /*MayBeFollowedByDirectInit=*/false); + + if (!TrailingReturnType.isInvalid()) { + Diag(ArrowLoc, + diag::err_requires_clause_must_appear_after_trailing_return) + << Range; + auto &FunctionChunk = D.getFunctionTypeInfo(); + FunctionChunk.HasTrailingReturnType = TrailingReturnType.isUsable(); + FunctionChunk.TrailingReturnType = TrailingReturnType.get(); + } else + SkipUntil({tok::equal, tok::l_brace, tok::arrow, tok::kw_try, tok::comma}, + StopAtSemi | StopBeforeMatch); + } +} + /// We have just started parsing the definition of a new class, /// so push that class onto our stack of classes that is currently /// being parsed. diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index 6ef303047db49..d5fc3864599df 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -22,6 +22,7 @@ #include "clang/Parse/Parser.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/ExprCXX.h" #include "clang/Basic/PrettyStackTrace.h" #include "clang/Parse/RAIIObjectsForParser.h" #include "clang/Sema/DeclSpec.h" @@ -145,7 +146,7 @@ Parser::ParseExpressionWithLeadingExtension(SourceLocation ExtLoc) { // Silence extension warnings in the sub-expression ExtensionRAIIObject O(Diags); - LHS = ParseCastExpression(false); + LHS = ParseCastExpression(AnyCastExpr); } if (!LHS.isInvalid()) @@ -169,7 +170,7 @@ ExprResult Parser::ParseAssignmentExpression(TypeCastState isTypeCast) { if (Tok.is(tok::kw_co_yield)) return ParseCoyieldExpression(); - ExprResult LHS = ParseCastExpression(/*isUnaryExpression=*/false, + ExprResult LHS = ParseCastExpression(AnyCastExpr, /*isAddressOfOperand=*/false, isTypeCast); return ParseRHSOfBinaryExpression(LHS, prec::Assignment); @@ -202,7 +203,7 @@ Parser::ParseConstantExpressionInExprEvalContext(TypeCastState isTypeCast) { Sema::ExpressionEvaluationContext::ConstantEvaluated && "Call this function only if your ExpressionEvaluationContext is " "already ConstantEvaluated"); - ExprResult LHS(ParseCastExpression(false, false, isTypeCast)); + ExprResult LHS(ParseCastExpression(AnyCastExpr, false, isTypeCast)); ExprResult Res(ParseRHSOfBinaryExpression(LHS, prec::Conditional)); return Actions.ActOnConstantExpression(Res); } @@ -220,7 +221,7 @@ ExprResult Parser::ParseConstantExpression(TypeCastState isTypeCast) { ExprResult Parser::ParseCaseExpression(SourceLocation CaseLoc) { EnterExpressionEvaluationContext ConstantEvaluated( Actions, Sema::ExpressionEvaluationContext::ConstantEvaluated); - ExprResult LHS(ParseCastExpression(false, false, NotTypeCast)); + ExprResult LHS(ParseCastExpression(AnyCastExpr, false, NotTypeCast)); ExprResult Res(ParseRHSOfBinaryExpression(LHS, prec::Conditional)); return Actions.ActOnCaseExpr(CaseLoc, Res); } @@ -234,13 +235,143 @@ ExprResult Parser::ParseCaseExpression(SourceLocation CaseLoc) { ExprResult Parser::ParseConstraintExpression() { EnterExpressionEvaluationContext ConstantEvaluated( Actions, Sema::ExpressionEvaluationContext::ConstantEvaluated); - ExprResult LHS(ParseCastExpression(/*isUnaryExpression=*/false)); + ExprResult LHS(ParseCastExpression(AnyCastExpr)); ExprResult Res(ParseRHSOfBinaryExpression(LHS, prec::LogicalOr)); - if (Res.isUsable() && !Actions.CheckConstraintExpression(Res.get())) + if (Res.isUsable() && !Actions.CheckConstraintExpression(Res.get())) { + Actions.CorrectDelayedTyposInExpr(Res); return ExprError(); + } return Res; } +/// \brief Parse a constraint-logical-and-expression. +/// +/// \param RightMostExpr If provided, will receive the right-most atomic +/// constraint that was parsed. +/// \verbatim +/// C++2a[temp.constr.decl]p1 +/// constraint-logical-and-expression: +/// primary-expression +/// constraint-logical-and-expression '&&' primary-expression +/// +/// \endverbatim +ExprResult +Parser::ParseConstraintLogicalAndExpression(bool IsTrailingRequiresClause) { + EnterExpressionEvaluationContext ConstantEvaluated( + Actions, Sema::ExpressionEvaluationContext::ConstantEvaluated); + bool NotPrimaryExpression = false; + auto ParsePrimary = [&] () { + ExprResult E = ParseCastExpression(PrimaryExprOnly, + /*isAddressOfOperand=*/false, + /*isTypeCast=*/NotTypeCast, + /*isVectorLiteral=*/false, + &NotPrimaryExpression); + if (E.isInvalid()) + return ExprError(); + auto RecoverFromNonPrimary = [&] (ExprResult E, bool Note) { + E = ParsePostfixExpressionSuffix(E); + // Use InclusiveOr, the precedence just after '&&' to not parse the + // next arguments to the logical and. + E = ParseRHSOfBinaryExpression(E, prec::InclusiveOr); + if (!E.isInvalid()) + Diag(E.get()->getExprLoc(), + Note + ? diag::note_unparenthesized_non_primary_expr_in_requires_clause + : diag::err_unparenthesized_non_primary_expr_in_requires_clause) + << FixItHint::CreateInsertion(E.get()->getBeginLoc(), "(") + << FixItHint::CreateInsertion( + PP.getLocForEndOfToken(E.get()->getEndLoc()), ")") + << E.get()->getSourceRange(); + return E; + }; + + if (NotPrimaryExpression || + // Check if the following tokens must be a part of a non-primary + // expression + getBinOpPrecedence(Tok.getKind(), GreaterThanIsOperator, + /*CPlusPlus11=*/true) > prec::LogicalAnd || + // Postfix operators other than '(' (which will be checked for in + // CheckConstraintExpression). + Tok.isOneOf(tok::period, tok::plusplus, tok::minusminus) || + (Tok.is(tok::l_square) && !NextToken().is(tok::l_square))) { + E = RecoverFromNonPrimary(E, /*Note=*/false); + if (E.isInvalid()) + return ExprError(); + NotPrimaryExpression = false; + } + bool PossibleNonPrimary; + bool IsConstraintExpr = + Actions.CheckConstraintExpression(E.get(), Tok, &PossibleNonPrimary, + IsTrailingRequiresClause); + if (!IsConstraintExpr || PossibleNonPrimary) { + // Atomic constraint might be an unparenthesized non-primary expression + // (such as a binary operator), in which case we might get here (e.g. in + // 'requires 0 + 1 && true' we would now be at '+', and parse and ignore + // the rest of the addition expression). Try to parse the rest of it here. + if (PossibleNonPrimary) + E = RecoverFromNonPrimary(E, /*Note=*/!IsConstraintExpr); + Actions.CorrectDelayedTyposInExpr(E); + return ExprError(); + } + return E; + }; + ExprResult LHS = ParsePrimary(); + if (LHS.isInvalid()) + return ExprError(); + while (Tok.is(tok::ampamp)) { + SourceLocation LogicalAndLoc = ConsumeToken(); + ExprResult RHS = ParsePrimary(); + if (RHS.isInvalid()) { + Actions.CorrectDelayedTyposInExpr(LHS); + return ExprError(); + } + ExprResult Op = Actions.ActOnBinOp(getCurScope(), LogicalAndLoc, + tok::ampamp, LHS.get(), RHS.get()); + if (!Op.isUsable()) { + Actions.CorrectDelayedTyposInExpr(RHS); + Actions.CorrectDelayedTyposInExpr(LHS); + return ExprError(); + } + LHS = Op; + } + return LHS; +} + +/// \brief Parse a constraint-logical-or-expression. +/// +/// \verbatim +/// C++2a[temp.constr.decl]p1 +/// constraint-logical-or-expression: +/// constraint-logical-and-expression +/// constraint-logical-or-expression '||' +/// constraint-logical-and-expression +/// +/// \endverbatim +ExprResult +Parser::ParseConstraintLogicalOrExpression(bool IsTrailingRequiresClause) { + ExprResult LHS(ParseConstraintLogicalAndExpression(IsTrailingRequiresClause)); + if (!LHS.isUsable()) + return ExprError(); + while (Tok.is(tok::pipepipe)) { + SourceLocation LogicalOrLoc = ConsumeToken(); + ExprResult RHS = + ParseConstraintLogicalAndExpression(IsTrailingRequiresClause); + if (!RHS.isUsable()) { + Actions.CorrectDelayedTyposInExpr(LHS); + return ExprError(); + } + ExprResult Op = Actions.ActOnBinOp(getCurScope(), LogicalOrLoc, + tok::pipepipe, LHS.get(), RHS.get()); + if (!Op.isUsable()) { + Actions.CorrectDelayedTyposInExpr(RHS); + Actions.CorrectDelayedTyposInExpr(LHS); + return ExprError(); + } + LHS = Op; + } + return LHS; +} + bool Parser::isNotExpressionStart() { tok::TokenKind K = Tok.getKind(); if (K == tok::l_brace || K == tok::r_brace || @@ -414,7 +545,7 @@ Parser::ParseRHSOfBinaryExpression(ExprResult LHS, prec::Level MinPrec) { } else if (getLangOpts().CPlusPlus && NextTokPrec <= prec::Conditional) RHS = ParseAssignmentExpression(); else - RHS = ParseCastExpression(false); + RHS = ParseCastExpression(AnyCastExpr); if (RHS.isInvalid()) { // FIXME: Errors generated by the delayed typo correction should be @@ -519,22 +650,24 @@ Parser::ParseRHSOfBinaryExpression(ExprResult LHS, prec::Level MinPrec) { } } -/// Parse a cast-expression, or, if \p isUnaryExpression is true, -/// parse a unary-expression. +/// Parse a cast-expression, unary-expression or primary-expression, based +/// on \p ExprType. /// /// \p isAddressOfOperand exists because an id-expression that is the /// operand of address-of gets special treatment due to member pointers. /// -ExprResult Parser::ParseCastExpression(bool isUnaryExpression, +ExprResult Parser::ParseCastExpression(CastParseKind ParseKind, bool isAddressOfOperand, TypeCastState isTypeCast, - bool isVectorLiteral) { + bool isVectorLiteral, + bool *NotPrimaryExpression) { bool NotCastExpr; - ExprResult Res = ParseCastExpression(isUnaryExpression, + ExprResult Res = ParseCastExpression(ParseKind, isAddressOfOperand, NotCastExpr, isTypeCast, - isVectorLiteral); + isVectorLiteral, + NotPrimaryExpression); if (NotCastExpr) Diag(Tok, diag::err_expected_expression); return Res; @@ -759,11 +892,12 @@ class CastExpressionIdValidator final : public CorrectionCandidateCallback { /// '__is_rvalue_expr' /// \endverbatim /// -ExprResult Parser::ParseCastExpression(bool isUnaryExpression, +ExprResult Parser::ParseCastExpression(CastParseKind ParseKind, bool isAddressOfOperand, bool &NotCastExpr, TypeCastState isTypeCast, - bool isVectorLiteral) { + bool isVectorLiteral, + bool *NotPrimaryExpression) { ExprResult Res; tok::TokenKind SavedKind = Tok.getKind(); auto SavedType = PreferredType; @@ -782,11 +916,21 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, // ParsePostfixExpressionSuffix. switch (SavedKind) { case tok::l_paren: { - // If this expression is limited to being a unary-expression, the parent can + // If this expression is limited to being a unary-expression, the paren can // not start a cast expression. - ParenParseOption ParenExprType = - (isUnaryExpression && !getLangOpts().CPlusPlus) ? CompoundLiteral - : CastExpr; + ParenParseOption ParenExprType; + switch (ParseKind) { + case CastParseKind::UnaryExprOnly: + if (!getLangOpts().CPlusPlus) + ParenExprType = CompoundLiteral; + LLVM_FALLTHROUGH; + case CastParseKind::AnyCastExpr: + ParenExprType = ParenParseOption::CastExpr; + break; + case CastParseKind::PrimaryExprOnly: + ParenExprType = FoldExpr; + break; + } ParsedType CastTy; SourceLocation RParenLoc; Res = ParseParenExpression(ParenExprType, false/*stopIfCastExr*/, @@ -861,8 +1005,9 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, if (TryAnnotateTypeOrScopeToken()) return ExprError(); assert(Tok.isNot(tok::kw_decltype) && Tok.isNot(tok::kw___super)); - return ParseCastExpression(isUnaryExpression, isAddressOfOperand); - + return ParseCastExpression(ParseKind, isAddressOfOperand, isTypeCast, + isVectorLiteral, NotPrimaryExpression); + case tok::identifier: { // primary-expression: identifier // unqualified-id: identifier // constant: enumeration-constant @@ -949,8 +1094,9 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, = RevertibleTypeTraits.find(II); if (Known != RevertibleTypeTraits.end()) { Tok.setKind(Known->second); - return ParseCastExpression(isUnaryExpression, isAddressOfOperand, - NotCastExpr, isTypeCast); + return ParseCastExpression(ParseKind, isAddressOfOperand, + NotCastExpr, isTypeCast, + isVectorLiteral, NotPrimaryExpression); } } @@ -961,7 +1107,10 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, if (TryAnnotateTypeOrScopeToken()) return ExprError(); if (!Tok.is(tok::identifier)) - return ParseCastExpression(isUnaryExpression, isAddressOfOperand); + return ParseCastExpression(ParseKind, isAddressOfOperand, + NotCastExpr, isTypeCast, + isVectorLiteral, + NotPrimaryExpression); } } @@ -1076,8 +1225,10 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, Tok.is(tok::r_paren) ? nullptr : &Replacement); if (!Res.isInvalid() && Res.isUnset()) { UnconsumeToken(Replacement); - return ParseCastExpression(isUnaryExpression, isAddressOfOperand, - NotCastExpr, isTypeCast); + return ParseCastExpression(ParseKind, isAddressOfOperand, + NotCastExpr, isTypeCast, + /*isVectorLiteral=*/false, + NotPrimaryExpression); } if (!Res.isInvalid() && Tok.is(tok::less)) checkPotentialAngleBracket(Res); @@ -1122,12 +1273,16 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, case tok::kw___builtin_FILE: case tok::kw___builtin_FUNCTION: case tok::kw___builtin_LINE: + if (NotPrimaryExpression) + *NotPrimaryExpression = true; return ParseBuiltinPrimaryExpression(); case tok::kw___null: return Actions.ActOnGNUNullExpr(ConsumeToken()); case tok::plusplus: // unary-expression: '++' unary-expression [C99] case tok::minusminus: { // unary-expression: '--' unary-expression [C99] + if (NotPrimaryExpression) + *NotPrimaryExpression = true; // C++ [expr.unary] has: // unary-expression: // ++ cast-expression @@ -1140,7 +1295,8 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, // One special case is implicitly handled here: if the preceding tokens are // an ambiguous cast expression, such as "(T())++", then we recurse to // determine whether the '++' is prefix or postfix. - Res = ParseCastExpression(!getLangOpts().CPlusPlus, + Res = ParseCastExpression(getLangOpts().CPlusPlus ? + UnaryExprOnly : AnyCastExpr, /*isAddressOfOperand*/false, NotCastExpr, NotTypeCast); if (NotCastExpr) { @@ -1156,10 +1312,12 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, return Res; } case tok::amp: { // unary-expression: '&' cast-expression + if (NotPrimaryExpression) + *NotPrimaryExpression = true; // Special treatment because of member pointers SourceLocation SavedLoc = ConsumeToken(); PreferredType.enterUnary(Actions, Tok.getLocation(), tok::amp, SavedLoc); - Res = ParseCastExpression(false, true); + Res = ParseCastExpression(AnyCastExpr, true); if (!Res.isInvalid()) Res = Actions.ActOnUnaryOp(getCurScope(), SavedLoc, SavedKind, Res.get()); return Res; @@ -1172,17 +1330,21 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, case tok::exclaim: // unary-expression: '!' cast-expression case tok::kw___real: // unary-expression: '__real' cast-expression [GNU] case tok::kw___imag: { // unary-expression: '__imag' cast-expression [GNU] + if (NotPrimaryExpression) + *NotPrimaryExpression = true; SourceLocation SavedLoc = ConsumeToken(); PreferredType.enterUnary(Actions, Tok.getLocation(), SavedKind, SavedLoc); - Res = ParseCastExpression(false); + Res = ParseCastExpression(AnyCastExpr); if (!Res.isInvalid()) Res = Actions.ActOnUnaryOp(getCurScope(), SavedLoc, SavedKind, Res.get()); return Res; } case tok::kw_co_await: { // unary-expression: 'co_await' cast-expression + if (NotPrimaryExpression) + *NotPrimaryExpression = true; SourceLocation CoawaitLoc = ConsumeToken(); - Res = ParseCastExpression(false); + Res = ParseCastExpression(AnyCastExpr); if (!Res.isInvalid()) Res = Actions.ActOnCoawaitExpr(getCurScope(), CoawaitLoc, Res.get()); return Res; @@ -1190,9 +1352,11 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, case tok::kw___extension__:{//unary-expression:'__extension__' cast-expr [GNU] // __extension__ silences extension warnings in the subexpression. + if (NotPrimaryExpression) + *NotPrimaryExpression = true; ExtensionRAIIObject O(Diags); // Use RAII to do this. SourceLocation SavedLoc = ConsumeToken(); - Res = ParseCastExpression(false); + Res = ParseCastExpression(AnyCastExpr); if (!Res.isInvalid()) Res = Actions.ActOnUnaryOp(getCurScope(), SavedLoc, SavedKind, Res.get()); return Res; @@ -1209,8 +1373,12 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, case tok::kw_vec_step: // unary-expression: OpenCL 'vec_step' expression // unary-expression: '__builtin_omp_required_simd_align' '(' type-name ')' case tok::kw___builtin_omp_required_simd_align: + if (NotPrimaryExpression) + *NotPrimaryExpression = true; return ParseUnaryExprOrTypeTraitExpression(); case tok::ampamp: { // unary-expression: '&&' identifier + if (NotPrimaryExpression) + *NotPrimaryExpression = true; SourceLocation AmpAmpLoc = ConsumeToken(); if (Tok.isNot(tok::identifier)) return ExprError(Diag(Tok, diag::err_expected) << tok::identifier); @@ -1229,18 +1397,26 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, case tok::kw_dynamic_cast: case tok::kw_reinterpret_cast: case tok::kw_static_cast: + if (NotPrimaryExpression) + *NotPrimaryExpression = true; Res = ParseCXXCasts(); break; case tok::kw___builtin_bit_cast: + if (NotPrimaryExpression) + *NotPrimaryExpression = true; Res = ParseBuiltinBitCast(); break; case tok::kw_typeid: + if (NotPrimaryExpression) + *NotPrimaryExpression = true; Res = ParseCXXTypeid(); break; case tok::kw___unique_stable_name: Res = ParseUniqueStableNameExpression(); break; case tok::kw___uuidof: + if (NotPrimaryExpression) + *NotPrimaryExpression = true; Res = ParseCXXUuidof(); break; case tok::kw_this: @@ -1305,6 +1481,10 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, return ExprError(); } + // Everything henceforth is a postfix-expression. + if (NotPrimaryExpression) + *NotPrimaryExpression = true; + if (SavedKind == tok::kw_typename) { // postfix-expression: typename-specifier '(' expression-list[opt] ')' // typename-specifier braced-init-list @@ -1341,8 +1521,9 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, if (TryAnnotateTypeOrScopeToken()) return ExprError(); if (!Tok.is(tok::annot_cxxscope)) - return ParseCastExpression(isUnaryExpression, isAddressOfOperand, - NotCastExpr, isTypeCast); + return ParseCastExpression(ParseKind, isAddressOfOperand, NotCastExpr, + isTypeCast, isVectorLiteral, + NotPrimaryExpression); Token Next = NextToken(); if (Next.is(tok::annot_template_id)) { @@ -1355,8 +1536,9 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, ParseOptionalCXXScopeSpecifier(SS, nullptr, /*EnteringContext=*/false); AnnotateTemplateIdTokenAsType(); - return ParseCastExpression(isUnaryExpression, isAddressOfOperand, - NotCastExpr, isTypeCast); + return ParseCastExpression(ParseKind, isAddressOfOperand, NotCastExpr, + isTypeCast, isVectorLiteral, + NotPrimaryExpression); } } @@ -1372,8 +1554,9 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, // translate it into a type and continue parsing as a cast // expression. AnnotateTemplateIdTokenAsType(); - return ParseCastExpression(isUnaryExpression, isAddressOfOperand, - NotCastExpr, isTypeCast); + return ParseCastExpression(ParseKind, isAddressOfOperand, + NotCastExpr, isTypeCast, isVectorLiteral, + NotPrimaryExpression); } // Fall through to treat the template-id as an id-expression. @@ -1390,15 +1573,22 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, if (TryAnnotateTypeOrScopeToken()) return ExprError(); if (!Tok.is(tok::coloncolon)) - return ParseCastExpression(isUnaryExpression, isAddressOfOperand); + return ParseCastExpression(ParseKind, isAddressOfOperand, isTypeCast, + isVectorLiteral, NotPrimaryExpression); // ::new -> [C++] new-expression // ::delete -> [C++] delete-expression SourceLocation CCLoc = ConsumeToken(); - if (Tok.is(tok::kw_new)) + if (Tok.is(tok::kw_new)) { + if (NotPrimaryExpression) + *NotPrimaryExpression = true; return ParseCXXNewExpression(true, CCLoc); - if (Tok.is(tok::kw_delete)) + } + if (Tok.is(tok::kw_delete)) { + if (NotPrimaryExpression) + *NotPrimaryExpression = true; return ParseCXXDeleteExpression(true, CCLoc); + } // This is not a type name or scope specifier, it is an invalid expression. Diag(CCLoc, diag::err_expected_expression); @@ -1406,12 +1596,18 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, } case tok::kw_new: // [C++] new-expression + if (NotPrimaryExpression) + *NotPrimaryExpression = true; return ParseCXXNewExpression(false, Tok.getLocation()); case tok::kw_delete: // [C++] delete-expression + if (NotPrimaryExpression) + *NotPrimaryExpression = true; return ParseCXXDeleteExpression(false, Tok.getLocation()); case tok::kw_noexcept: { // [C++0x] 'noexcept' '(' expression ')' + if (NotPrimaryExpression) + *NotPrimaryExpression = true; Diag(Tok, diag::warn_cxx98_compat_noexcept_expr); SourceLocation KeyLoc = ConsumeToken(); BalancedDelimiterTracker T(*this, tok::l_paren); @@ -1440,13 +1636,19 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, case tok::kw___array_rank: case tok::kw___array_extent: + if (NotPrimaryExpression) + *NotPrimaryExpression = true; return ParseArrayTypeTrait(); case tok::kw___is_lvalue_expr: case tok::kw___is_rvalue_expr: + if (NotPrimaryExpression) + *NotPrimaryExpression = true; return ParseExpressionTrait(); case tok::at: { + if (NotPrimaryExpression) + *NotPrimaryExpression = true; SourceLocation AtLoc = ConsumeToken(); return ParseObjCAtExpression(AtLoc); } @@ -1468,8 +1670,13 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, // expression, or we have something that doesn't appear to be a lambda. // If we're in the last case, we fall back to ParseObjCMessageExpression. Res = TryParseLambdaExpression(); - if (!Res.isInvalid() && !Res.get()) + if (!Res.isInvalid() && !Res.get()) { + // We assume Objective-C++ message expressions are not + // primary-expressions. + if (NotPrimaryExpression) + *NotPrimaryExpression = true; Res = ParseObjCMessageExpression(); + } break; } Res = ParseLambdaExpression(); @@ -1489,6 +1696,11 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, // are compiling for OpenCL, we need to return an error as this implies // that the address of the function is being taken, which is illegal in CL. + if (ParseKind == PrimaryExprOnly) + // This is strictly a primary-expression - no postfix-expr pieces should be + // parsed. + return Res; + // These can be followed by postfix-expr pieces. PreferredType = SavedType; Res = ParsePostfixExpressionSuffix(Res); @@ -1932,7 +2144,7 @@ Parser::ParseExprAfterUnaryExprOrTypeTrait(const Token &OpTok, return ExprError(); } - Operand = ParseCastExpression(true/*isUnaryExpression*/); + Operand = ParseCastExpression(UnaryExprOnly); } else { // If it starts with a '(', we know that it is either a parenthesized // type-name, or it is a unary-expression that starts with a compound @@ -2519,8 +2731,8 @@ Parser::ParseParenExpression(ParenParseOption &ExprType, bool stopIfCastExpr, RParenLoc = T.getCloseLocation(); PreferredType.enterTypeCast(Tok.getLocation(), Ty.get().get()); - ExprResult SubExpr = ParseCastExpression(/*isUnaryExpression=*/false); - + ExprResult SubExpr = ParseCastExpression(AnyCastExpr); + if (Ty.isInvalid() || SubExpr.isInvalid()) return ExprError(); @@ -2600,7 +2812,7 @@ Parser::ParseParenExpression(ParenParseOption &ExprType, bool stopIfCastExpr, // Parse the cast-expression that follows it next. // isVectorLiteral = true will make sure we don't parse any // Postfix expression yet - Result = ParseCastExpression(/*isUnaryExpression=*/false, + Result = ParseCastExpression(/*isUnaryExpression=*/AnyCastExpr, /*isAddressOfOperand=*/false, /*isTypeCast=*/IsTypeCast, /*isVectorLiteral=*/true); @@ -2652,7 +2864,7 @@ Parser::ParseParenExpression(ParenParseOption &ExprType, bool stopIfCastExpr, PreferredType.enterTypeCast(Tok.getLocation(), CastTy.get()); // Parse the cast-expression that follows it next. // TODO: For cast expression with CastTy. - Result = ParseCastExpression(/*isUnaryExpression=*/false, + Result = ParseCastExpression(/*isUnaryExpression=*/AnyCastExpr, /*isAddressOfOperand=*/false, /*isTypeCast=*/IsTypeCast); if (!Result.isInvalid()) { diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp index a39998482e956..f4ffa08b2a1b7 100644 --- a/clang/lib/Parse/ParseExprCXX.cpp +++ b/clang/lib/Parse/ParseExprCXX.cpp @@ -1371,10 +1371,6 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( DeclEndLoc = Range.getEnd(); } - PrototypeScope.Exit(); - - WarnIfHasCUDATargetAttr(); - SourceLocation NoLoc; D.AddTypeInfo(DeclaratorChunk::getFunction( /*HasProto=*/true, @@ -1389,13 +1385,22 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( /*DeclsInPrototype=*/None, LParenLoc, FunLocalRangeEnd, D, TrailingReturnType, &DS), std::move(Attr), DeclEndLoc); + + // Parse requires-clause[opt]. + if (Tok.is(tok::kw_requires)) + ParseTrailingRequiresClause(D); + + PrototypeScope.Exit(); + + WarnIfHasCUDATargetAttr(); } else if (Tok.isOneOf(tok::kw_mutable, tok::arrow, tok::kw___attribute, tok::kw_constexpr, tok::kw_consteval, tok::kw___private, tok::kw___global, tok::kw___local, - tok::kw___constant, tok::kw___generic) || + tok::kw___constant, tok::kw___generic, + tok::kw_requires) || (Tok.is(tok::l_square) && NextToken().is(tok::l_square))) { // It's common to forget that one needs '()' before 'mutable', an attribute - // specifier, or the result type. Deal with this. + // specifier, the result type, or the requires clause. Deal with this. unsigned TokKind = 0; switch (Tok.getKind()) { case tok::kw_mutable: TokKind = 0; break; @@ -1409,6 +1414,7 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( case tok::l_square: TokKind = 2; break; case tok::kw_constexpr: TokKind = 3; break; case tok::kw_consteval: TokKind = 4; break; + case tok::kw_requires: TokKind = 5; break; default: llvm_unreachable("Unknown token kind"); } @@ -1440,8 +1446,6 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( DeclEndLoc = Range.getEnd(); } - WarnIfHasCUDATargetAttr(); - SourceLocation NoLoc; D.AddTypeInfo(DeclaratorChunk::getFunction( /*HasProto=*/true, @@ -1462,6 +1466,12 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( /*DeclsInPrototype=*/None, DeclLoc, DeclEndLoc, D, TrailingReturnType), std::move(Attr), DeclEndLoc); + + // Parse the requires-clause, if present. + if (Tok.is(tok::kw_requires)) + ParseTrailingRequiresClause(D); + + WarnIfHasCUDATargetAttr(); } // FIXME: Rename BlockScope -> ClosureScope if we decide to continue using @@ -3238,7 +3248,7 @@ Parser::ParseCXXDeleteExpression(bool UseGlobal, SourceLocation Start) { return ExprError(); } - ExprResult Operand(ParseCastExpression(false)); + ExprResult Operand(ParseCastExpression(AnyCastExpr)); if (Operand.isInvalid()) return Operand; @@ -3469,7 +3479,7 @@ Parser::ParseCXXAmbiguousParenExpression(ParenParseOption &ExprType, // If it is not a cast-expression, NotCastExpr will be true and no token // will be consumed. ColonProt.restore(); - Result = ParseCastExpression(false/*isUnaryExpression*/, + Result = ParseCastExpression(AnyCastExpr, false/*isAddressofOperand*/, NotCastExpr, // type-id has priority. diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 24855df334f4c..1095919baa7d3 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -2264,8 +2264,8 @@ ExprResult Parser::ParseOpenMPParensExpr(StringRef ClauseName, return ExprError(); SourceLocation ELoc = Tok.getLocation(); - ExprResult LHS(ParseCastExpression( - /*isUnaryExpression=*/false, IsAddressOfOperand, NotTypeCast)); + ExprResult LHS(ParseCastExpression(AnyCastExpr, IsAddressOfOperand, + NotTypeCast)); ExprResult Val(ParseRHSOfBinaryExpression(LHS, prec::Conditional)); Val = Actions.ActOnFinishFullExpr(Val.get(), ELoc, /*DiscardedValue*/ false); @@ -2513,7 +2513,7 @@ OMPClause *Parser::ParseOpenMPSingleExprWithArgClause(OpenMPClauseKind Kind, Kind == OMPC_if; if (NeedAnExpression) { SourceLocation ELoc = Tok.getLocation(); - ExprResult LHS(ParseCastExpression(false, false, NotTypeCast)); + ExprResult LHS(ParseCastExpression(AnyCastExpr, false, NotTypeCast)); Val = ParseRHSOfBinaryExpression(LHS, prec::Conditional); Val = Actions.ActOnFinishFullExpr(Val.get(), ELoc, /*DiscardedValue*/ false); diff --git a/clang/lib/Parse/ParseTemplate.cpp b/clang/lib/Parse/ParseTemplate.cpp index 928bc5aa25b35..35cee596bb016 100644 --- a/clang/lib/Parse/ParseTemplate.cpp +++ b/clang/lib/Parse/ParseTemplate.cpp @@ -130,7 +130,9 @@ Decl *Parser::ParseTemplateDeclarationOrSpecialization( if (TryConsumeToken(tok::kw_requires)) { OptionalRequiresClauseConstraintER = - Actions.CorrectDelayedTyposInExpr(ParseConstraintExpression()); + Actions.CorrectDelayedTyposInExpr( + ParseConstraintLogicalOrExpression( + /*IsTrailingRequiresClause=*/false)); if (!OptionalRequiresClauseConstraintER.isUsable()) { // Skip until the semi-colon or a '}'. SkipUntil(tok::r_brace, StopAtSemi | StopBeforeMatch); @@ -254,8 +256,12 @@ Decl *Parser::ParseSingleDeclarationAfterTemplate( }); LateParsedAttrList LateParsedAttrs(true); - if (DeclaratorInfo.isFunctionDeclarator()) + if (DeclaratorInfo.isFunctionDeclarator()) { + if (Tok.is(tok::kw_requires)) + ParseTrailingRequiresClause(DeclaratorInfo); + MaybeParseGNUAttributes(DeclaratorInfo, &LateParsedAttrs); + } if (DeclaratorInfo.isFunctionDeclarator() && isStartOfFunctionDefinition(DeclaratorInfo)) { diff --git a/clang/lib/Parse/ParseTentative.cpp b/clang/lib/Parse/ParseTentative.cpp index 418729a4b2658..9ea800c89a775 100644 --- a/clang/lib/Parse/ParseTentative.cpp +++ b/clang/lib/Parse/ParseTentative.cpp @@ -1031,6 +1031,10 @@ Parser::TPResult Parser::TryParseDeclarator(bool mayBeAbstract, // direct-declarator '[' constant-expression[opt] ']' // direct-abstract-declarator[opt] '[' constant-expression[opt] ']' TPR = TryParseBracketDeclarator(); + } else if (Tok.is(tok::kw_requires)) { + // declarator requires-clause + // A requires clause indicates a function declaration. + TPR = TPResult::True; } else { break; } @@ -2015,7 +2019,6 @@ Parser::TryParseParameterDeclarationClause(bool *InvalidAsDeclaration, /// 'throw' '(' type-id-list[opt] ')' /// Parser::TPResult Parser::TryParseFunctionDeclarator() { - // The '(' is already parsed. TPResult TPR = TryParseParameterDeclarationClause(); diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index 6216206690b0c..a905ebc673056 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -2008,7 +2008,7 @@ static bool fixOverloadedReinterpretCastExpr(Sema &Self, QualType DestType, // No guarantees that ResolveAndFixSingleFunctionTemplateSpecialization // preserves Result. Result = E; - if (!Self.resolveAndFixAddressOfOnlyViableOverloadCandidate( + if (!Self.resolveAndFixAddressOfSingleOverloadCandidate( Result, /*DoFunctionPointerConversion=*/true)) return false; return Result.isUsable(); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 6c54fd1915771..682d2ebf97689 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -340,7 +340,8 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, case Builtin::BI__builtin___strncat_chk: case Builtin::BI__builtin___strncpy_chk: case Builtin::BI__builtin___stpncpy_chk: - case Builtin::BI__builtin___memccpy_chk: { + case Builtin::BI__builtin___memccpy_chk: + case Builtin::BI__builtin___mempcpy_chk: { DiagID = diag::warn_builtin_chk_overflow; IsChkVariant = true; SizeIndex = TheCall->getNumArgs() - 2; @@ -379,7 +380,9 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, case Builtin::BImemmove: case Builtin::BI__builtin_memmove: case Builtin::BImemset: - case Builtin::BI__builtin_memset: { + case Builtin::BI__builtin_memset: + case Builtin::BImempcpy: + case Builtin::BI__builtin_mempcpy: { DiagID = diag::warn_fortify_source_overflow; SizeIndex = TheCall->getNumArgs() - 1; ObjectIndex = 0; diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index 7f0bdc9b47822..018ac2d7dc9d1 100755 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "clang/Sema/SemaConcept.h" #include "clang/Sema/Sema.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/SemaDiagnostic.h" @@ -18,12 +19,16 @@ #include "clang/Sema/Template.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Basic/OperatorPrecedence.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerUnion.h" using namespace clang; using namespace sema; -bool Sema::CheckConstraintExpression(Expr *ConstraintExpression) { +bool +Sema::CheckConstraintExpression(Expr *ConstraintExpression, Token NextToken, + bool *PossibleNonPrimary, + bool IsTrailingRequiresClause) { // C++2a [temp.constr.atomic]p1 // ..E shall be a constant expression of type bool. @@ -31,22 +36,56 @@ bool Sema::CheckConstraintExpression(Expr *ConstraintExpression) { if (auto *BinOp = dyn_cast(ConstraintExpression)) { if (BinOp->getOpcode() == BO_LAnd || BinOp->getOpcode() == BO_LOr) - return CheckConstraintExpression(BinOp->getLHS()) && - CheckConstraintExpression(BinOp->getRHS()); + return CheckConstraintExpression(BinOp->getLHS(), NextToken, + PossibleNonPrimary) && + CheckConstraintExpression(BinOp->getRHS(), NextToken, + PossibleNonPrimary); } else if (auto *C = dyn_cast(ConstraintExpression)) - return CheckConstraintExpression(C->getSubExpr()); + return CheckConstraintExpression(C->getSubExpr(), NextToken, + PossibleNonPrimary); + + QualType Type = ConstraintExpression->getType(); + + auto CheckForNonPrimary = [&] { + if (PossibleNonPrimary) + *PossibleNonPrimary = + // We have the following case: + // template requires func(0) struct S { }; + // The user probably isn't aware of the parentheses required around + // the function call, and we're only going to parse 'func' as the + // primary-expression, and complain that it is of non-bool type. + (NextToken.is(tok::l_paren) && + (IsTrailingRequiresClause || + (Type->isDependentType() && + IsDependentFunctionNameExpr(ConstraintExpression)) || + Type->isFunctionType() || + Type->isSpecificBuiltinType(BuiltinType::Overload))) || + // We have the following case: + // template requires size_ == 0 struct S { }; + // The user probably isn't aware of the parentheses required around + // the binary operator, and we're only going to parse 'func' as the + // first operand, and complain that it is of non-bool type. + getBinOpPrecedence(NextToken.getKind(), + /*GreaterThanIsOperator=*/true, + getLangOpts().CPlusPlus11) > prec::LogicalAnd; + }; // An atomic constraint! - if (ConstraintExpression->isTypeDependent()) + if (ConstraintExpression->isTypeDependent()) { + CheckForNonPrimary(); return true; + } - QualType Type = ConstraintExpression->getType(); if (!Context.hasSameUnqualifiedType(Type, Context.BoolTy)) { Diag(ConstraintExpression->getExprLoc(), diag::err_non_bool_atomic_constraint) << Type << ConstraintExpression->getSourceRange(); + CheckForNonPrimary(); return false; } + + if (PossibleNonPrimary) + *PossibleNonPrimary = false; return true; } @@ -417,123 +456,25 @@ void Sema::DiagnoseUnsatisfiedConstraint( } } -namespace { -struct AtomicConstraint { - const Expr *ConstraintExpr; - llvm::Optional> ParameterMapping; - - AtomicConstraint(Sema &S, const Expr *ConstraintExpr) : - ConstraintExpr(ConstraintExpr) { }; - - bool hasMatchingParameterMapping(ASTContext &C, - const AtomicConstraint &Other) const { - if (!ParameterMapping != !Other.ParameterMapping) - return false; - if (!ParameterMapping) - return true; - if (ParameterMapping->size() != Other.ParameterMapping->size()) - return false; - - for (unsigned I = 0, S = ParameterMapping->size(); I < S; ++I) - if (!C.getCanonicalTemplateArgument((*ParameterMapping)[I].getArgument()) - .structurallyEquals(C.getCanonicalTemplateArgument( - (*Other.ParameterMapping)[I].getArgument()))) - return false; - return true; - } - - bool subsumes(ASTContext &C, const AtomicConstraint &Other) const { - // C++ [temp.constr.order] p2 - // - an atomic constraint A subsumes another atomic constraint B - // if and only if the A and B are identical [...] - // - // C++ [temp.constr.atomic] p2 - // Two atomic constraints are identical if they are formed from the - // same expression and the targets of the parameter mappings are - // equivalent according to the rules for expressions [...] - - // We do not actually substitute the parameter mappings into the - // constraint expressions, therefore the constraint expressions are - // the originals, and comparing them will suffice. - if (ConstraintExpr != Other.ConstraintExpr) - return false; - - // Check that the parameter lists are identical - return hasMatchingParameterMapping(C, Other); - } -}; - -/// \brief A normalized constraint, as defined in C++ [temp.constr.normal], is -/// either an atomic constraint, a conjunction of normalized constraints or a -/// disjunction of normalized constraints. -struct NormalizedConstraint { - enum CompoundConstraintKind { CCK_Conjunction, CCK_Disjunction }; - - using CompoundConstraint = llvm::PointerIntPair< - std::pair *, 1, - CompoundConstraintKind>; - - llvm::PointerUnion Constraint; - - NormalizedConstraint(AtomicConstraint *C): Constraint{C} { }; - NormalizedConstraint(ASTContext &C, NormalizedConstraint LHS, - NormalizedConstraint RHS, CompoundConstraintKind Kind) - : Constraint{CompoundConstraint{ - new (C) std::pair{LHS, - RHS}, - Kind}} { }; - - CompoundConstraintKind getCompoundKind() const { - assert(!isAtomic() && "getCompoundKind called on atomic constraint."); - return Constraint.get().getInt(); - } - - bool isAtomic() const { return Constraint.is(); } - - NormalizedConstraint &getLHS() const { - assert(!isAtomic() && "getLHS called on atomic constraint."); - return Constraint.get().getPointer()->first; - } - - NormalizedConstraint &getRHS() const { - assert(!isAtomic() && "getRHS called on atomic constraint."); - return Constraint.get().getPointer()->second; +const NormalizedConstraint * +Sema::getNormalizedAssociatedConstraints( + NamedDecl *ConstrainedDecl, ArrayRef AssociatedConstraints) { + auto CacheEntry = NormalizationCache.find(ConstrainedDecl); + if (CacheEntry == NormalizationCache.end()) { + auto Normalized = + NormalizedConstraint::fromConstraintExprs(*this, ConstrainedDecl, + AssociatedConstraints); + CacheEntry = + NormalizationCache + .try_emplace(ConstrainedDecl, + Normalized + ? new (Context) NormalizedConstraint( + std::move(*Normalized)) + : nullptr) + .first; } - - AtomicConstraint *getAtomicConstraint() const { - assert(isAtomic() && - "getAtomicConstraint called on non-atomic constraint."); - return Constraint.get(); - } - - static llvm::Optional - fromConstraintExprs(Sema &S, NamedDecl *D, ArrayRef E) { - assert(E.size() != 0); - auto First = fromConstraintExpr(S, D, E[0]); - if (E.size() == 1) - return First; - auto Second = fromConstraintExpr(S, D, E[1]); - if (!Second) - return llvm::Optional{}; - llvm::Optional Conjunction; - Conjunction.emplace(S.Context, std::move(*First), std::move(*Second), - CCK_Conjunction); - for (unsigned I = 2; I < E.size(); ++I) { - auto Next = fromConstraintExpr(S, D, E[I]); - if (!Next) - return llvm::Optional{}; - NormalizedConstraint NewConjunction(S.Context, std::move(*Conjunction), - std::move(*Next), CCK_Conjunction); - *Conjunction = std::move(NewConjunction); - } - return Conjunction; - } - -private: - static llvm::Optional fromConstraintExpr(Sema &S, - NamedDecl *D, - const Expr *E); -}; + return CacheEntry->second; +} static bool substituteParameterMappings(Sema &S, NormalizedConstraint &N, ConceptDecl *Concept, ArrayRef TemplateArgs, @@ -555,11 +496,13 @@ static bool substituteParameterMappings(Sema &S, NormalizedConstraint &N, llvm::SmallBitVector OccurringIndices(TemplateParams->size()); S.MarkUsedTemplateParameters(Atomic.ConstraintExpr, /*OnlyDeduced=*/false, /*Depth=*/0, OccurringIndices); - Atomic.ParameterMapping.emplace(); - Atomic.ParameterMapping->reserve(OccurringIndices.size()); - for (unsigned I = 0, C = TemplateParams->size(); I != C; ++I) + Atomic.ParameterMapping.emplace( + MutableArrayRef( + new (S.Context) TemplateArgumentLoc[OccurringIndices.count()], + OccurringIndices.count())); + for (unsigned I = 0, J = 0, C = TemplateParams->size(); I != C; ++I) if (OccurringIndices[I]) - Atomic.ParameterMapping->push_back( + new (&(*Atomic.ParameterMapping)[J++]) TemplateArgumentLoc( S.getIdentityTemplateArgumentLoc(TemplateParams->begin()[I], // Here we assume we do not support things like // template @@ -585,6 +528,30 @@ static bool substituteParameterMappings(Sema &S, NormalizedConstraint &N, return false; } +Optional +NormalizedConstraint::fromConstraintExprs(Sema &S, NamedDecl *D, + ArrayRef E) { + assert(E.size() != 0); + auto First = fromConstraintExpr(S, D, E[0]); + if (E.size() == 1) + return First; + auto Second = fromConstraintExpr(S, D, E[1]); + if (!Second) + return None; + llvm::Optional Conjunction; + Conjunction.emplace(S.Context, std::move(*First), std::move(*Second), + CCK_Conjunction); + for (unsigned I = 2; I < E.size(); ++I) { + auto Next = fromConstraintExpr(S, D, E[I]); + if (!Next) + return llvm::Optional{}; + NormalizedConstraint NewConjunction(S.Context, std::move(*Conjunction), + std::move(*Next), CCK_Conjunction); + *Conjunction = std::move(NewConjunction); + } + return Conjunction; +} + llvm::Optional NormalizedConstraint::fromConstraintExpr(Sema &S, NamedDecl *D, const Expr *E) { assert(E != nullptr); @@ -604,11 +571,11 @@ NormalizedConstraint::fromConstraintExpr(Sema &S, NamedDecl *D, const Expr *E) { return None; return NormalizedConstraint( - S.Context, *LHS, *RHS, + S.Context, std::move(*LHS), std::move(*RHS), BO->getOpcode() == BO_LAnd ? CCK_Conjunction : CCK_Disjunction); } } else if (auto *CSE = dyn_cast(E)) { - Optional SubNF; + const NormalizedConstraint *SubNF; { Sema::InstantiatingTemplate Inst( S, CSE->getExprLoc(), @@ -623,24 +590,26 @@ NormalizedConstraint::fromConstraintExpr(Sema &S, NamedDecl *D, const Expr *E) { // constraint. If any such substitution results in an invalid type or // expression, the program is ill-formed; no diagnostic is required. // [...] - SubNF = fromConstraintExpr(S, CSE->getNamedConcept(), - CSE->getNamedConcept()->getConstraintExpr()); + ConceptDecl *CD = CSE->getNamedConcept(); + SubNF = S.getNormalizedAssociatedConstraints(CD, + {CD->getConstraintExpr()}); if (!SubNF) return None; } + Optional New; + New.emplace(S.Context, *SubNF); + if (substituteParameterMappings( - S, *SubNF, CSE->getNamedConcept(), + S, *New, CSE->getNamedConcept(), CSE->getTemplateArguments(), CSE->getTemplateArgsAsWritten())) return None; - return SubNF; + return New; } return NormalizedConstraint{new (S.Context) AtomicConstraint(S, E)}; } -} // namespace - using NormalForm = llvm::SmallVector, 4>; @@ -703,22 +672,9 @@ static NormalForm makeDNF(const NormalizedConstraint &Normalized) { return Res; } -static bool subsumes(Sema &S, NamedDecl *DP, ArrayRef P, - NamedDecl *DQ, ArrayRef Q, bool &Subsumes) { - // C++ [temp.constr.order] p2 - // In order to determine if a constraint P subsumes a constraint Q, P is - // transformed into disjunctive normal form, and Q is transformed into - // conjunctive normal form. [...] - auto PNormalized = NormalizedConstraint::fromConstraintExprs(S, DP, P); - if (!PNormalized) - return true; - const NormalForm PDNF = makeDNF(*PNormalized); - - auto QNormalized = NormalizedConstraint::fromConstraintExprs(S, DQ, Q); - if (!QNormalized) - return true; - const NormalForm QCNF = makeCNF(*QNormalized); - +template +static bool subsumes(NormalForm PDNF, NormalForm QCNF, + AtomicSubsumptionEvaluator E) { // C++ [temp.constr.order] p2 // Then, P subsumes Q if and only if, for every disjunctive clause Pi in the // disjunctive normal form of P, Pi subsumes every conjunctive clause Qj in @@ -733,7 +689,7 @@ static bool subsumes(Sema &S, NamedDecl *DP, ArrayRef P, bool Found = false; for (const AtomicConstraint *Pia : Pi) { for (const AtomicConstraint *Qjb : Qj) { - if (Pia->subsumes(S.Context, *Qjb)) { + if (E(*Pia, *Qjb)) { Found = true; break; } @@ -741,13 +697,32 @@ static bool subsumes(Sema &S, NamedDecl *DP, ArrayRef P, if (Found) break; } - if (!Found) { - Subsumes = false; + if (!Found) return false; - } } } - Subsumes = true; + return true; +} + +template +static bool subsumes(Sema &S, NamedDecl *DP, ArrayRef P, + NamedDecl *DQ, ArrayRef Q, bool &Subsumes, + AtomicSubsumptionEvaluator E) { + // C++ [temp.constr.order] p2 + // In order to determine if a constraint P subsumes a constraint Q, P is + // transformed into disjunctive normal form, and Q is transformed into + // conjunctive normal form. [...] + auto *PNormalized = S.getNormalizedAssociatedConstraints(DP, P); + if (!PNormalized) + return true; + const NormalForm PDNF = makeDNF(*PNormalized); + + auto *QNormalized = S.getNormalizedAssociatedConstraints(DQ, Q); + if (!QNormalized) + return true; + const NormalForm QCNF = makeCNF(*QNormalized); + + Subsumes = subsumes(PDNF, QCNF, E); return false; } @@ -770,8 +745,84 @@ bool Sema::IsAtLeastAsConstrained(NamedDecl *D1, ArrayRef AC1, Result = CacheEntry->second; return false; } - if (subsumes(*this, D1, AC1, D2, AC2, Result)) + + if (subsumes(*this, D1, AC1, D2, AC2, Result, + [this] (const AtomicConstraint &A, const AtomicConstraint &B) { + return A.subsumes(Context, B); + })) return true; SubsumptionCache.try_emplace(Key, Result); return false; -} \ No newline at end of file +} + +bool Sema::MaybeEmitAmbiguousAtomicConstraintsDiagnostic(NamedDecl *D1, + ArrayRef AC1, NamedDecl *D2, ArrayRef AC2) { + if (isSFINAEContext()) + // No need to work here because our notes would be discarded. + return false; + + if (AC1.empty() || AC2.empty()) + return false; + + auto NormalExprEvaluator = + [this] (const AtomicConstraint &A, const AtomicConstraint &B) { + return A.subsumes(Context, B); + }; + + const Expr *AmbiguousAtomic1 = nullptr, *AmbiguousAtomic2 = nullptr; + auto IdenticalExprEvaluator = + [&] (const AtomicConstraint &A, const AtomicConstraint &B) { + if (!A.hasMatchingParameterMapping(Context, B)) + return false; + const Expr *EA = A.ConstraintExpr, *EB = B.ConstraintExpr; + if (EA == EB) + return true; + + // Not the same source level expression - are the expressions + // identical? + llvm::FoldingSetNodeID IDA, IDB; + EA->Profile(IDA, Context, /*Cannonical=*/true); + EB->Profile(IDB, Context, /*Cannonical=*/true); + if (IDA != IDB) + return false; + + AmbiguousAtomic1 = EA; + AmbiguousAtomic2 = EB; + return true; + }; + + { + // The subsumption checks might cause diagnostics + SFINAETrap Trap(*this); + auto *Normalized1 = getNormalizedAssociatedConstraints(D1, AC1); + if (!Normalized1) + return false; + const NormalForm DNF1 = makeDNF(*Normalized1); + const NormalForm CNF1 = makeCNF(*Normalized1); + + auto *Normalized2 = getNormalizedAssociatedConstraints(D2, AC2); + if (!Normalized2) + return false; + const NormalForm DNF2 = makeDNF(*Normalized2); + const NormalForm CNF2 = makeCNF(*Normalized2); + + bool Is1AtLeastAs2Normally = subsumes(DNF1, CNF2, NormalExprEvaluator); + bool Is2AtLeastAs1Normally = subsumes(DNF2, CNF1, NormalExprEvaluator); + bool Is1AtLeastAs2 = subsumes(DNF1, CNF2, IdenticalExprEvaluator); + bool Is2AtLeastAs1 = subsumes(DNF2, CNF1, IdenticalExprEvaluator); + if (Is1AtLeastAs2 == Is1AtLeastAs2Normally && + Is2AtLeastAs1 == Is2AtLeastAs1Normally) + // Same result - no ambiguity was caused by identical atomic expressions. + return false; + } + + // A different result! Some ambiguous atomic constraint(s) caused a difference + assert(AmbiguousAtomic1 && AmbiguousAtomic2); + + Diag(AmbiguousAtomic1->getBeginLoc(), diag::note_ambiguous_atomic_constraints) + << AmbiguousAtomic1->getSourceRange(); + Diag(AmbiguousAtomic2->getBeginLoc(), + diag::note_ambiguous_atomic_constraints_similar_expression) + << AmbiguousAtomic2->getSourceRange(); + return true; +} diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index 271c4a10f3e44..6dc9e342beb92 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -1228,7 +1228,7 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { return false; if (RequiresNoThrowAlloc) { - const auto *FT = OperatorNew->getType()->getAs(); + const auto *FT = OperatorNew->getType()->castAs(); if (!FT->isNothrow(/*ResultIfDependent*/ false)) { S.Diag(OperatorNew->getLocation(), diag::err_coroutine_promise_new_requires_nothrow) @@ -1281,7 +1281,7 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { // Check if we need to pass the size. const auto *OpDeleteType = - OpDeleteQualType.getTypePtr()->getAs(); + OpDeleteQualType.getTypePtr()->castAs(); if (OpDeleteType->getNumParams() > 1) DeleteArgs.push_back(FrameSize); diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 51cc184f4d015..26b3a01b138c0 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -7902,7 +7902,13 @@ struct FindOverriddenMethod { Path.Decls = Path.Decls.slice(1)) { NamedDecl *D = Path.Decls.front(); if (CXXMethodDecl *MD = dyn_cast(D)) { - if (MD->isVirtual() && !S->IsOverload(Method, MD, false)) + if (MD->isVirtual() && + !S->IsOverload( + Method, MD, /*UseMemberUsingDeclRules=*/false, + /*ConsiderCudaAttrs=*/true, + // C++2a [class.virtual]p2 does not consider requires clauses + // when overriding. + /*ConsiderRequiresClauses=*/false)) return true; } } @@ -8247,7 +8253,8 @@ static FunctionDecl *CreateNewFunctionDecl(Sema &SemaRef, Declarator &D, NewFD = FunctionDecl::Create(SemaRef.Context, DC, D.getBeginLoc(), NameInfo, R, TInfo, SC, isInline, HasPrototype, - CSK_unspecified); + CSK_unspecified, + /*TrailingRequiresClause=*/nullptr); if (D.isInvalidType()) NewFD->setInvalidDecl(); @@ -8264,6 +8271,7 @@ static FunctionDecl *CreateNewFunctionDecl(Sema &SemaRef, Declarator &D, ConstexprKind = CSK_unspecified; D.getMutableDeclSpec().ClearConstexprSpec(); } + Expr *TrailingRequiresClause = D.getTrailingRequiresClause(); // Check that the return type is not an abstract class type. // For record types, this is done by the AbstractClassUsageDiagnoser once @@ -8283,7 +8291,8 @@ static FunctionDecl *CreateNewFunctionDecl(Sema &SemaRef, Declarator &D, return CXXConstructorDecl::Create( SemaRef.Context, cast(DC), D.getBeginLoc(), NameInfo, R, TInfo, ExplicitSpecifier, isInline, - /*isImplicitlyDeclared=*/false, ConstexprKind); + /*isImplicitlyDeclared=*/false, ConstexprKind, InheritedConstructor(), + TrailingRequiresClause); } else if (Name.getNameKind() == DeclarationName::CXXDestructorName) { // This is a C++ destructor declaration. @@ -8292,8 +8301,8 @@ static FunctionDecl *CreateNewFunctionDecl(Sema &SemaRef, Declarator &D, CXXRecordDecl *Record = cast(DC); CXXDestructorDecl *NewDD = CXXDestructorDecl::Create( SemaRef.Context, Record, D.getBeginLoc(), NameInfo, R, TInfo, - isInline, - /*isImplicitlyDeclared=*/false, ConstexprKind); + isInline, /*isImplicitlyDeclared=*/false, ConstexprKind, + TrailingRequiresClause); // If the destructor needs an implicit exception specification, set it // now. FIXME: It'd be nice to be able to create the right type to start @@ -8313,7 +8322,8 @@ static FunctionDecl *CreateNewFunctionDecl(Sema &SemaRef, Declarator &D, return FunctionDecl::Create(SemaRef.Context, DC, D.getBeginLoc(), D.getIdentifierLoc(), Name, R, TInfo, SC, isInline, - /*hasPrototype=*/true, ConstexprKind); + /*hasPrototype=*/true, ConstexprKind, + TrailingRequiresClause); } } else if (Name.getNameKind() == DeclarationName::CXXConversionFunctionName) { @@ -8330,9 +8340,14 @@ static FunctionDecl *CreateNewFunctionDecl(Sema &SemaRef, Declarator &D, IsVirtualOkay = true; return CXXConversionDecl::Create( SemaRef.Context, cast(DC), D.getBeginLoc(), NameInfo, R, - TInfo, isInline, ExplicitSpecifier, ConstexprKind, SourceLocation()); + TInfo, isInline, ExplicitSpecifier, ConstexprKind, SourceLocation(), + TrailingRequiresClause); } else if (Name.getNameKind() == DeclarationName::CXXDeductionGuideName) { + if (TrailingRequiresClause) + SemaRef.Diag(TrailingRequiresClause->getBeginLoc(), + diag::err_trailing_requires_clause_on_deduction_guide) + << TrailingRequiresClause->getSourceRange(); SemaRef.CheckDeductionGuideDeclarator(D, R, SC); return CXXDeductionGuideDecl::Create(SemaRef.Context, DC, D.getBeginLoc(), @@ -8354,7 +8369,8 @@ static FunctionDecl *CreateNewFunctionDecl(Sema &SemaRef, Declarator &D, // This is a C++ method declaration. CXXMethodDecl *Ret = CXXMethodDecl::Create( SemaRef.Context, cast(DC), D.getBeginLoc(), NameInfo, R, - TInfo, SC, isInline, ConstexprKind, SourceLocation()); + TInfo, SC, isInline, ConstexprKind, SourceLocation(), + TrailingRequiresClause); IsVirtualOkay = !Ret->isStatic(); return Ret; } else { @@ -8368,7 +8384,7 @@ static FunctionDecl *CreateNewFunctionDecl(Sema &SemaRef, Declarator &D, // - we're in C++ (where every function has a prototype), return FunctionDecl::Create(SemaRef.Context, DC, D.getBeginLoc(), NameInfo, R, TInfo, SC, isInline, true /*HasPrototype*/, - ConstexprKind); + ConstexprKind, TrailingRequiresClause); } } @@ -10575,6 +10591,11 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD, } } } + if (Method->isVirtual() && NewFD->getTrailingRequiresClause()) + // C++2a [class.virtual]p6 + // A virtual method shall not have a requires-clause. + Diag(NewFD->getTrailingRequiresClause()->getBeginLoc(), + diag::err_constrained_virtual_method); if (Method->isStatic()) checkThisInStaticMemberFunctionType(Method); diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index de3d44eb1f0f8..1b156bdfc9a4f 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -8199,7 +8199,8 @@ NamedDecl * Sema::DeclClonePragmaWeak(NamedDecl *ND, IdentifierInfo *II, NewFD = FunctionDecl::Create( FD->getASTContext(), FD->getDeclContext(), Loc, Loc, DeclarationName(II), FD->getType(), FD->getTypeSourceInfo(), SC_None, - false /*isInlineSpecified*/, FD->hasPrototype(), CSK_unspecified); + false /*isInlineSpecified*/, FD->hasPrototype(), CSK_unspecified, + FD->getTrailingRequiresClause()); NewD = NewFD; if (FD->getQualifier()) diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 6321a28bf25cb..34137657a919e 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -3868,6 +3868,26 @@ void Sema::ActOnStartCXXInClassMemberInitializer() { PushFunctionScope(); } +void Sema::ActOnStartTrailingRequiresClause(Scope *S, Declarator &D) { + if (!D.isFunctionDeclarator()) + return; + auto &FTI = D.getFunctionTypeInfo(); + if (!FTI.Params) + return; + for (auto &Param : ArrayRef(FTI.Params, + FTI.NumParams)) { + auto *ParamDecl = cast(Param.Param); + if (ParamDecl->getDeclName()) + PushOnScopeChains(ParamDecl, S, /*AddToContext=*/false); + } +} + +ExprResult Sema::ActOnFinishTrailingRequiresClause(ExprResult ConstraintExpr) { + if (ConstraintExpr.isInvalid()) + return ExprError(); + return CorrectDelayedTyposInExpr(ConstraintExpr); +} + /// This is invoked after parsing an in-class initializer for a /// non-static C++ class member, and after instantiating an in-class initializer /// in a class template. Such actions are deferred until the class is complete. @@ -12702,7 +12722,8 @@ Sema::findInheritingConstructor(SourceLocation Loc, BaseCtor->getExplicitSpecifier(), /*isInline=*/true, /*isImplicitlyDeclared=*/true, Constexpr ? BaseCtor->getConstexprKind() : CSK_unspecified, - InheritedConstructor(Shadow, BaseCtor)); + InheritedConstructor(Shadow, BaseCtor), + BaseCtor->getTrailingRequiresClause()); if (Shadow->isInvalidDecl()) DerivedCtor->setInvalidDecl(); @@ -17094,6 +17115,11 @@ bool Sema::checkThisInStaticMemberFunctionType(CXXMethodDecl *Method) { if (checkThisInStaticMemberFunctionExceptionSpec(Method)) return true; + // Check the trailing requires clause + if (Expr *E = Method->getTrailingRequiresClause()) + if (!Finder.TraverseStmt(E)) + return true; + return checkThisInStaticMemberFunctionAttributes(Method); } diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 1b563406552cd..ac2a9e1cff08e 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -329,6 +329,30 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef Locs, diagnoseUseOfInternalDeclInInlineFunction(*this, D, Loc); + // [expr.prim.id]p4 + // A program that refers explicitly or implicitly to a function with a + // trailing requires-clause whose constraint-expression is not satisfied, + // other than to declare it, is ill-formed. [...] + // + // See if this is a function with constraints that need to be satisfied. + if (FunctionDecl *FD = dyn_cast(D)) { + if (Expr *RC = FD->getTrailingRequiresClause()) { + ConstraintSatisfaction Satisfaction; + bool Failed = CheckConstraintSatisfaction(RC, Satisfaction); + if (Failed) + // A diagnostic will have already been generated (non-constant + // constraint expression, for example) + return true; + if (!Satisfaction.IsSatisfied) { + Diag(Loc, + diag::err_reference_to_function_with_unsatisfied_constraints) + << D; + DiagnoseUnsatisfiedConstraint(Satisfaction); + return true; + } + } + } + return false; } @@ -18051,7 +18075,7 @@ ExprResult Sema::CheckPlaceholderExpr(Expr *E) { // No guarantees that ResolveAndFixSingleFunctionTemplateSpecialization // leaves Result unchanged on failure. Result = E; - if (resolveAndFixAddressOfOnlyViableOverloadCandidate(Result)) + if (resolveAndFixAddressOfSingleOverloadCandidate(Result)) return Result; // If that failed, try to recover with a call. @@ -18188,3 +18212,8 @@ ExprResult Sema::ActOnObjCAvailabilityCheckExpr( return new (Context) ObjCAvailabilityCheckExpr(Version, AtLoc, RParen, Context.BoolTy); } + +bool Sema::IsDependentFunctionNameExpr(Expr *E) { + assert(E->isTypeDependent()); + return isa(E); +} diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp index d09a3377d2b03..c2d14a44f53d4 100644 --- a/clang/lib/Sema/SemaLambda.cpp +++ b/clang/lib/Sema/SemaLambda.cpp @@ -361,7 +361,8 @@ CXXMethodDecl *Sema::startLambdaDefinition(CXXRecordDecl *Class, TypeSourceInfo *MethodTypeInfo, SourceLocation EndLoc, ArrayRef Params, - ConstexprSpecKind ConstexprKind) { + ConstexprSpecKind ConstexprKind, + Expr *TrailingRequiresClause) { QualType MethodType = MethodTypeInfo->getType(); TemplateParameterList *TemplateParams = getGenericLambdaTemplateParameterList(getCurLambda(), *this); @@ -395,7 +396,7 @@ CXXMethodDecl *Sema::startLambdaDefinition(CXXRecordDecl *Class, DeclarationNameInfo(MethodName, IntroducerRange.getBegin(), MethodNameLoc), MethodType, MethodTypeInfo, SC_None, - /*isInline=*/true, ConstexprKind, EndLoc); + /*isInline=*/true, ConstexprKind, EndLoc, TrailingRequiresClause); Method->setAccess(AS_public); if (!TemplateParams) Class->addDecl(Method); @@ -972,7 +973,8 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro, KnownDependent, Intro.Default); CXXMethodDecl *Method = startLambdaDefinition(Class, Intro.Range, MethodTyInfo, EndLoc, Params, - ParamInfo.getDeclSpec().getConstexprSpecifier()); + ParamInfo.getDeclSpec().getConstexprSpecifier(), + ParamInfo.getTrailingRequiresClause()); if (ExplicitParams) CheckCXXDefaultArguments(Method); diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 1f77f2b9342cf..5fb59b545176d 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -1137,7 +1137,8 @@ Sema::CheckOverload(Scope *S, FunctionDecl *New, const LookupResult &Old, } bool Sema::IsOverload(FunctionDecl *New, FunctionDecl *Old, - bool UseMemberUsingDeclRules, bool ConsiderCudaAttrs) { + bool UseMemberUsingDeclRules, bool ConsiderCudaAttrs, + bool ConsiderRequiresClauses) { // C++ [basic.start.main]p2: This function shall not be overloaded. if (New->isMain()) return false; @@ -1273,23 +1274,38 @@ bool Sema::IsOverload(FunctionDecl *New, FunctionDecl *Old, if (getLangOpts().CUDA && ConsiderCudaAttrs) { // Don't allow overloading of destructors. (In theory we could, but it // would be a giant change to clang.) - if (isa(New)) - return false; - - CUDAFunctionTarget NewTarget = IdentifyCUDATarget(New), - OldTarget = IdentifyCUDATarget(Old); - if (NewTarget == CFT_InvalidTarget) - return false; + if (!isa(New)) { + CUDAFunctionTarget NewTarget = IdentifyCUDATarget(New), + OldTarget = IdentifyCUDATarget(Old); + if (NewTarget != CFT_InvalidTarget) { + assert((OldTarget != CFT_InvalidTarget) && + "Unexpected invalid target."); + + // Allow overloading of functions with same signature and different CUDA + // target attributes. + if (NewTarget != OldTarget) + return true; + } + } + } - assert((OldTarget != CFT_InvalidTarget) && "Unexpected invalid target."); + if (ConsiderRequiresClauses) { + Expr *NewRC = New->getTrailingRequiresClause(), + *OldRC = Old->getTrailingRequiresClause(); + if ((NewRC != nullptr) != (OldRC != nullptr)) + // RC are most certainly different - these are overloads. + return true; - // Allow overloading of functions with same signature and different CUDA - // target attributes. - return NewTarget != OldTarget; + if (NewRC) { + llvm::FoldingSetNodeID NewID, OldID; + NewRC->Profile(NewID, Context, /*Canonical=*/true); + OldRC->Profile(OldID, Context, /*Canonical=*/true); + if (NewID != OldID) + // RCs are not equivalent - these are overloads. + return true; + } } - // TODO: Concepts: Check function trailing requires clauses here. - // The signatures match; this is not an overload. return false; } @@ -6258,6 +6274,16 @@ void Sema::AddOverloadCandidate( return; } + if (Expr *RequiresClause = Function->getTrailingRequiresClause()) { + ConstraintSatisfaction Satisfaction; + if (CheckConstraintSatisfaction(RequiresClause, Satisfaction) || + !Satisfaction.IsSatisfied) { + Candidate.Viable = false; + Candidate.FailureKind = ovl_fail_constraints_not_satisfied; + return; + } + } + // Determine the implicit conversion sequences for each of the // arguments. for (unsigned ArgIdx = 0; ArgIdx < Args.size(); ++ArgIdx) { @@ -6774,6 +6800,16 @@ Sema::AddMethodCandidate(CXXMethodDecl *Method, DeclAccessPair FoundDecl, return; } + if (Expr *RequiresClause = Method->getTrailingRequiresClause()) { + ConstraintSatisfaction Satisfaction; + if (CheckConstraintSatisfaction(RequiresClause, Satisfaction) || + !Satisfaction.IsSatisfied) { + Candidate.Viable = false; + Candidate.FailureKind = ovl_fail_constraints_not_satisfied; + return; + } + } + // Determine the implicit conversion sequences for each of the // arguments. for (unsigned ArgIdx = 0; ArgIdx < Args.size(); ++ArgIdx) { @@ -7130,6 +7166,17 @@ void Sema::AddConversionCandidate( return; } + Expr *RequiresClause = Conversion->getTrailingRequiresClause(); + if (RequiresClause) { + ConstraintSatisfaction Satisfaction; + if (CheckConstraintSatisfaction(RequiresClause, Satisfaction) || + !Satisfaction.IsSatisfied) { + Candidate.Viable = false; + Candidate.FailureKind = ovl_fail_constraints_not_satisfied; + return; + } + } + // We won't go through a user-defined type conversion function to convert a // derived to base as such conversions are given Conversion Rank. They only // go through a copy constructor. 13.3.3.1.2-p4 [over.ics.user] @@ -9461,6 +9508,35 @@ bool clang::isBetterOverloadCandidate( return BetterTemplate == Cand1.Function->getPrimaryTemplate(); } + // -— F1 and F2 are non-template functions with the same + // parameter-type-lists, and F1 is more constrained than F2 [...], + if (Cand1.Function && Cand2.Function && !Cand1IsSpecialization && + !Cand2IsSpecialization && Cand1.Function->hasPrototype() && + Cand2.Function->hasPrototype()) { + auto *PT1 = cast(Cand1.Function->getFunctionType()); + auto *PT2 = cast(Cand2.Function->getFunctionType()); + if (PT1->getNumParams() == PT2->getNumParams() && + PT1->isVariadic() == PT2->isVariadic() && + S.FunctionParamTypesAreEqual(PT1, PT2)) { + Expr *RC1 = Cand1.Function->getTrailingRequiresClause(); + Expr *RC2 = Cand2.Function->getTrailingRequiresClause(); + if (RC1 && RC2) { + bool AtLeastAsConstrained1, AtLeastAsConstrained2; + if (S.IsAtLeastAsConstrained(Cand1.Function, {RC1}, Cand2.Function, + {RC2}, AtLeastAsConstrained1)) + return false; + if (!AtLeastAsConstrained1) + return false; + if (S.IsAtLeastAsConstrained(Cand2.Function, {RC2}, Cand1.Function, + {RC1}, AtLeastAsConstrained2)) + return false; + if (!AtLeastAsConstrained2) + return true; + } else if (RC1 || RC2) + return RC1 != nullptr; + } + } + // -- F1 is a constructor for a class D, F2 is a constructor for a base // class B of D, and for all arguments the corresponding parameters of // F1 and F2 have the same type. @@ -9829,6 +9905,24 @@ static bool checkAddressOfFunctionIsAvailable(Sema &S, const FunctionDecl *FD, return false; } + if (const Expr *RC = FD->getTrailingRequiresClause()) { + ConstraintSatisfaction Satisfaction; + if (S.CheckConstraintSatisfaction(RC, Satisfaction)) + return false; + if (!Satisfaction.IsSatisfied) { + if (Complain) { + if (InOverloadResolution) + S.Diag(FD->getBeginLoc(), + diag::note_ovl_candidate_unsatisfied_constraints); + else + S.Diag(Loc, diag::err_addrof_function_constraints_not_satisfied) + << FD; + S.DiagnoseUnsatisfiedConstraint(Satisfaction); + } + return false; + } + } + auto I = llvm::find_if(FD->parameters(), [](const ParmVarDecl *P) { return P->hasAttr(); }); @@ -9886,6 +9980,55 @@ void Sema::NoteOverloadCandidate(NamedDecl *Found, FunctionDecl *Fn, MaybeEmitInheritedConstructorNote(*this, Found); } +static void +MaybeDiagnoseAmbiguousConstraints(Sema &S, ArrayRef Cands) { + // Perhaps the ambiguity was caused by two atomic constraints that are + // 'identical' but not equivalent: + // + // void foo() requires (sizeof(T) > 4) { } // #1 + // void foo() requires (sizeof(T) > 4) && T::value { } // #2 + // + // The 'sizeof(T) > 4' constraints are seemingly equivalent and should cause + // #2 to subsume #1, but these constraint are not considered equivalent + // according to the subsumption rules because they are not the same + // source-level construct. This behavior is quite confusing and we should try + // to help the user figure out what happened. + + SmallVector FirstAC, SecondAC; + FunctionDecl *FirstCand = nullptr, *SecondCand = nullptr; + for (auto I = Cands.begin(), E = Cands.end(); I != E; ++I) { + if (!I->Function) + continue; + SmallVector AC; + if (auto *Template = I->Function->getPrimaryTemplate()) + Template->getAssociatedConstraints(AC); + else + I->Function->getAssociatedConstraints(AC); + if (AC.empty()) + continue; + if (FirstCand == nullptr) { + FirstCand = I->Function; + FirstAC = AC; + } else if (SecondCand == nullptr) { + SecondCand = I->Function; + SecondAC = AC; + } else { + // We have more than one pair of constrained functions - this check is + // expensive and we'd rather not try to diagnose it. + return; + } + } + if (!SecondCand) + return; + // The diagnostic can only happen if there are associated constraints on + // both sides (there needs to be some identical atomic constraint). + if (S.MaybeEmitAmbiguousAtomicConstraintsDiagnostic(FirstCand, FirstAC, + SecondCand, SecondAC)) + // Just show the user one diagnostic, they'll probably figure it out + // from here. + return; +} + // Notes the location of all overload candidates designated through // OverloadedExpr void Sema::NoteAllOverloadCandidates(Expr *OverloadedExpr, QualType DestType, @@ -10771,6 +10914,23 @@ static void NoteFunctionCandidate(Sema &S, OverloadCandidate *Cand, case ovl_non_default_multiversion_function: // Do nothing, these should simply be ignored. break; + + case ovl_fail_constraints_not_satisfied: { + std::string FnDesc; + std::pair FnKindPair = + ClassifyOverloadCandidate(S, Cand->FoundDecl, Fn, + Cand->getRewriteKind(), FnDesc); + + S.Diag(Fn->getLocation(), + diag::note_ovl_candidate_constraints_not_satisfied) + << (unsigned)FnKindPair.first << (unsigned)ocs_non_template + << FnDesc /* Ignored */; + ConstraintSatisfaction Satisfaction; + if (S.CheckConstraintSatisfaction(Fn->getTrailingRequiresClause(), + Satisfaction)) + break; + S.DiagnoseUnsatisfiedConstraint(Satisfaction); + } } } @@ -11156,6 +11316,9 @@ void OverloadCandidateSet::NoteCandidates(PartialDiagnosticAt PD, S.Diag(PD.first, PD.second); NoteCandidates(S, Args, Cands, Opc, OpLoc); + + if (OCD == OCD_AmbiguousCandidates) + MaybeDiagnoseAmbiguousConstraints(S, {begin(), end()}); } void OverloadCandidateSet::NoteCandidates(Sema &S, ArrayRef Args, @@ -11804,15 +11967,33 @@ Sema::ResolveAddressOfOverloadedFunction(Expr *AddressOfExpr, /// resolve that function to a single function that can have its address taken. /// This will modify `Pair` iff it returns non-null. /// -/// This routine can only realistically succeed if all but one candidates in the -/// overload set for SrcExpr cannot have their addresses taken. +/// This routine can only succeed if from all of the candidates in the overload +/// set for SrcExpr that can have their addresses taken, there is one candidate +/// that is more constrained than the rest. FunctionDecl * -Sema::resolveAddressOfOnlyViableOverloadCandidate(Expr *E, - DeclAccessPair &Pair) { +Sema::resolveAddressOfSingleOverloadCandidate(Expr *E, DeclAccessPair &Pair) { OverloadExpr::FindResult R = OverloadExpr::find(E); OverloadExpr *Ovl = R.Expression; + bool IsResultAmbiguous = false; FunctionDecl *Result = nullptr; DeclAccessPair DAP; + SmallVector AmbiguousDecls; + + auto CheckMoreConstrained = + [&] (FunctionDecl *FD1, FunctionDecl *FD2) -> Optional { + SmallVector AC1, AC2; + FD1->getAssociatedConstraints(AC1); + FD2->getAssociatedConstraints(AC2); + bool AtLeastAsConstrained1, AtLeastAsConstrained2; + if (IsAtLeastAsConstrained(FD1, AC1, FD2, AC2, AtLeastAsConstrained1)) + return None; + if (IsAtLeastAsConstrained(FD2, AC2, FD1, AC1, AtLeastAsConstrained2)) + return None; + if (AtLeastAsConstrained1 == AtLeastAsConstrained2) + return None; + return AtLeastAsConstrained1; + }; + // Don't use the AddressOfResolver because we're specifically looking for // cases where we have one overload candidate that lacks // enable_if/pass_object_size/... @@ -11824,32 +12005,54 @@ Sema::resolveAddressOfOnlyViableOverloadCandidate(Expr *E, if (!checkAddressOfFunctionIsAvailable(FD)) continue; - // We have more than one result; quit. - if (Result) - return nullptr; + // We have more than one result - see if it is more constrained than the + // previous one. + if (Result) { + Optional MoreConstrainedThanPrevious = CheckMoreConstrained(FD, + Result); + if (!MoreConstrainedThanPrevious) { + IsResultAmbiguous = true; + AmbiguousDecls.push_back(FD); + continue; + } + if (!*MoreConstrainedThanPrevious) + continue; + // FD is more constrained - replace Result with it. + } + IsResultAmbiguous = false; DAP = I.getPair(); Result = FD; } - if (Result) + if (IsResultAmbiguous) + return nullptr; + + if (Result) { + SmallVector ResultAC; + // We skipped over some ambiguous declarations which might be ambiguous with + // the selected result. + for (FunctionDecl *Skipped : AmbiguousDecls) + if (!CheckMoreConstrained(Skipped, Result).hasValue()) + return nullptr; Pair = DAP; + } return Result; } /// Given an overloaded function, tries to turn it into a non-overloaded -/// function reference using resolveAddressOfOnlyViableOverloadCandidate. This +/// function reference using resolveAddressOfSingleOverloadCandidate. This /// will perform access checks, diagnose the use of the resultant decl, and, if /// requested, potentially perform a function-to-pointer decay. /// -/// Returns false if resolveAddressOfOnlyViableOverloadCandidate fails. +/// Returns false if resolveAddressOfSingleOverloadCandidate fails. /// Otherwise, returns true. This may emit diagnostics and return true. -bool Sema::resolveAndFixAddressOfOnlyViableOverloadCandidate( +bool Sema::resolveAndFixAddressOfSingleOverloadCandidate( ExprResult &SrcExpr, bool DoFunctionPointerConverion) { Expr *E = SrcExpr.get(); assert(E->getType() == Context.OverloadTy && "SrcExpr must be an overload"); DeclAccessPair DAP; - FunctionDecl *Found = resolveAddressOfOnlyViableOverloadCandidate(E, DAP); + FunctionDecl *Found = resolveAddressOfSingleOverloadCandidate(E, DAP); if (!Found || Found->isCPUDispatchMultiVersion() || Found->isCPUSpecificMultiVersion()) return false; diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index ade8a5a6ac148..69aabcd7d6345 100755 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -3750,6 +3750,11 @@ static void checkMoreSpecializedThanPrimary(Sema &S, PartialSpecDecl *Partial) { } S.Diag(Template->getLocation(), diag::note_template_decl_here); + SmallVector PartialAC, TemplateAC; + Template->getAssociatedConstraints(TemplateAC); + Partial->getAssociatedConstraints(PartialAC); + S.MaybeEmitAmbiguousAtomicConstraintsDiagnostic(Partial, PartialAC, Template, + TemplateAC); } static void diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index 521160d1ad23e..d267ae8572e44 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -3389,11 +3389,6 @@ Sema::TemplateDeductionResult Sema::FinishTemplateArgumentDeduction( PartialOverloading)) return Result; - if (TemplateDeductionResult Result - = CheckDeducedArgumentConstraints(*this, FunctionTemplate, Builder, - Info)) - return Result; - // C++ [temp.deduct.call]p10: [DR1391] // If deduction succeeds for all parameters that contain // template-parameters that participate in template argument deduction, @@ -3439,6 +3434,23 @@ Sema::TemplateDeductionResult Sema::FinishTemplateArgumentDeduction( return TDK_SubstitutionFailure; } + // C++2a [temp.deduct]p5 + // [...] When all template arguments have been deduced [...] all uses of + // template parameters [...] are replaced with the corresponding deduced + // or default argument values. + // [...] If the function template has associated constraints + // ([temp.constr.decl]), those constraints are checked for satisfaction + // ([temp.constr.constr]). If the constraints are not satisfied, type + // deduction fails. + if (CheckInstantiatedFunctionTemplateConstraints(Info.getLocation(), + Specialization, Builder, Info.AssociatedConstraintsSatisfaction)) + return TDK_MiscellaneousDeductionFailure; + + if (!Info.AssociatedConstraintsSatisfaction.IsSatisfied) { + Info.reset(TemplateArgumentList::CreateCopy(Context, Builder)); + return TDK_ConstraintsNotSatisfied; + } + if (OriginalCallArgs) { // C++ [temp.deduct.call]p4: // In general, the deduction process attempts to find template argument @@ -3559,7 +3571,7 @@ ResolveOverloadForDeduction(Sema &S, TemplateParameterList *TemplateParams, DeclAccessPair DAP; if (FunctionDecl *Viable = - S.resolveAddressOfOnlyViableOverloadCandidate(Arg, DAP)) + S.resolveAddressOfSingleOverloadCandidate(Arg, DAP)) return GetTypeOfFunction(S, R, Viable); return {}; diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 90b85db6d0883..a9018cfeccbc1 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -150,7 +150,7 @@ Sema::getTemplateInstantiationArgs(NamedDecl *D, break; // If this function is a generic lambda specialization, we are done. - if (isGenericLambdaCallOperatorSpecialization(Function)) + if (isGenericLambdaCallOperatorOrStaticInvokerSpecialization(Function)) break; } else if (FunctionTemplateDecl *FunTmpl diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 4d248f4a292cb..8d55a7e309ef3 100755 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -656,11 +656,10 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs, LateAttrs->push_back(LateInstantiatedAttribute(TmplAttr, Saved, New)); } else { // Allow 'this' within late-parsed attributes. - NamedDecl *ND = dyn_cast(New); - CXXRecordDecl *ThisContext = - dyn_cast_or_null(ND->getDeclContext()); + auto *ND = cast(New); + auto *ThisContext = dyn_cast_or_null(ND->getDeclContext()); CXXThisScopeRAII ThisScope(*this, ThisContext, Qualifiers(), - ND && ND->isCXXInstanceMember()); + ND->isCXXInstanceMember()); Attr *NewAttr = sema::instantiateTemplateAttribute(TmplAttr, Context, *this, TemplateArgs); @@ -1847,6 +1846,18 @@ Decl *TemplateDeclInstantiator::VisitFunctionDecl( return nullptr; } + // FIXME: Concepts: Do not substitute into constraint expressions + Expr *TrailingRequiresClause = D->getTrailingRequiresClause(); + if (TrailingRequiresClause) { + ExprResult SubstRC = SemaRef.SubstExpr(TrailingRequiresClause, + TemplateArgs); + if (SubstRC.isInvalid()) + return nullptr; + TrailingRequiresClause = SubstRC.get(); + if (!SemaRef.CheckConstraintExpression(TrailingRequiresClause)) + return nullptr; + } + // If we're instantiating a local function declaration, put the result // in the enclosing namespace; otherwise we need to find the instantiated // context. @@ -1883,7 +1894,8 @@ Decl *TemplateDeclInstantiator::VisitFunctionDecl( Function = FunctionDecl::Create( SemaRef.Context, DC, D->getInnerLocStart(), NameInfo, T, TInfo, D->getCanonicalDecl()->getStorageClass(), D->isInlineSpecified(), - D->hasWrittenPrototype(), D->getConstexprKind()); + D->hasWrittenPrototype(), D->getConstexprKind(), + TrailingRequiresClause); Function->setRangeEnd(D->getSourceRange().getEnd()); } @@ -1910,6 +1922,9 @@ Decl *TemplateDeclInstantiator::VisitFunctionDecl( Params[P]->setOwningFunction(Function); Function->setParams(Params); + if (TrailingRequiresClause) + Function->setTrailingRequiresClause(TrailingRequiresClause); + if (TemplateParams) { // Our resulting instantiation is actually a function template, since we // are substituting only the outer template parameters. For example, given @@ -2169,6 +2184,18 @@ Decl *TemplateDeclInstantiator::VisitCXXMethodDecl( return nullptr; } + // FIXME: Concepts: Do not substitute into constraint expressions + Expr *TrailingRequiresClause = D->getTrailingRequiresClause(); + if (TrailingRequiresClause) { + ExprResult SubstRC = SemaRef.SubstExpr(TrailingRequiresClause, + TemplateArgs); + if (SubstRC.isInvalid()) + return nullptr; + TrailingRequiresClause = SubstRC.get(); + if (!SemaRef.CheckConstraintExpression(TrailingRequiresClause)) + return nullptr; + } + DeclContext *DC = Owner; if (isFriend) { if (QualifierLoc) { @@ -2201,23 +2228,27 @@ Decl *TemplateDeclInstantiator::VisitCXXMethodDecl( Method = CXXConstructorDecl::Create( SemaRef.Context, Record, StartLoc, NameInfo, T, TInfo, InstantiatedExplicitSpecifier, Constructor->isInlineSpecified(), false, - Constructor->getConstexprKind()); + Constructor->getConstexprKind(), InheritedConstructor(), + TrailingRequiresClause); Method->setRangeEnd(Constructor->getEndLoc()); } else if (CXXDestructorDecl *Destructor = dyn_cast(D)) { Method = CXXDestructorDecl::Create( SemaRef.Context, Record, StartLoc, NameInfo, T, TInfo, - Destructor->isInlineSpecified(), false, Destructor->getConstexprKind()); + Destructor->isInlineSpecified(), false, Destructor->getConstexprKind(), + TrailingRequiresClause); Method->setRangeEnd(Destructor->getEndLoc()); } else if (CXXConversionDecl *Conversion = dyn_cast(D)) { Method = CXXConversionDecl::Create( SemaRef.Context, Record, StartLoc, NameInfo, T, TInfo, Conversion->isInlineSpecified(), InstantiatedExplicitSpecifier, - Conversion->getConstexprKind(), Conversion->getEndLoc()); + Conversion->getConstexprKind(), Conversion->getEndLoc(), + TrailingRequiresClause); } else { StorageClass SC = D->isStatic() ? SC_Static : SC_None; Method = CXXMethodDecl::Create(SemaRef.Context, Record, StartLoc, NameInfo, T, TInfo, SC, D->isInlineSpecified(), - D->getConstexprKind(), D->getEndLoc()); + D->getConstexprKind(), D->getEndLoc(), + TrailingRequiresClause); } if (D->isInlined()) @@ -4119,6 +4150,48 @@ void Sema::InstantiateExceptionSpec(SourceLocation PointOfInstantiation, TemplateArgs); } +bool Sema::CheckInstantiatedFunctionTemplateConstraints( + SourceLocation PointOfInstantiation, FunctionDecl *Decl, + ArrayRef TemplateArgs, + ConstraintSatisfaction &Satisfaction) { + // In most cases we're not going to have constraints, so check for that first. + FunctionTemplateDecl *Template = Decl->getPrimaryTemplate(); + // Note - code synthesis context for the constraints check is created + // inside CheckConstraintsSatisfaction. + SmallVector TemplateAC; + Template->getAssociatedConstraints(TemplateAC); + if (TemplateAC.empty()) { + Satisfaction.IsSatisfied = true; + return false; + } + + // Enter the scope of this instantiation. We don't use + // PushDeclContext because we don't have a scope. + Sema::ContextRAII savedContext(*this, Decl); + LocalInstantiationScope Scope(*this); + + MultiLevelTemplateArgumentList MLTAL = + getTemplateInstantiationArgs(Decl, nullptr, /*RelativeToPrimary*/true); + + // If this is not an explicit specialization - we need to get the instantiated + // version of the template arguments and add them to scope for the + // substitution. + if (Decl->isTemplateInstantiation()) { + InstantiatingTemplate Inst(*this, Decl->getPointOfInstantiation(), + InstantiatingTemplate::ConstraintsCheck{}, Decl->getPrimaryTemplate(), + MLTAL.getInnermost(), SourceRange()); + if (Inst.isInvalid()) + return true; + if (addInstantiatedParametersToScope(*this, Decl, + Decl->getTemplateInstantiationPattern(), + Scope, MLTAL)) + return true; + } + + return CheckConstraintSatisfaction(Template, TemplateAC, TemplateArgs, + PointOfInstantiation, Satisfaction); +} + /// Initializes the common fields of an instantiation function /// declaration (New) from the corresponding fields of its template (Tmpl). /// diff --git a/clang/lib/Sema/SemaTemplateVariadic.cpp b/clang/lib/Sema/SemaTemplateVariadic.cpp index 975d6620c06f8..d947d6d282be0 100644 --- a/clang/lib/Sema/SemaTemplateVariadic.cpp +++ b/clang/lib/Sema/SemaTemplateVariadic.cpp @@ -937,6 +937,10 @@ bool Sema::containsUnexpandedParameterPacks(Declarator &D) { } } + if (Expr *TRC = D.getTrailingRequiresClause()) + if (TRC->containsUnexpandedParameterPack()) + return true; + return false; } diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index e99af17a400ae..6107892b3769c 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -11559,6 +11559,13 @@ TreeTransform::TransformLambdaExpr(LambdaExpr *E) { NewCallOpType); } + // Transform the trailing requires clause + ExprResult NewTrailingRequiresClause; + if (Expr *TRC = E->getCallOperator()->getTrailingRequiresClause()) + // FIXME: Concepts: Substitution into requires clause should only happen + // when checking satisfaction. + NewTrailingRequiresClause = getDerived().TransformExpr(TRC); + // Create the local class that will describe the lambda. CXXRecordDecl *OldClass = E->getLambdaClass(); CXXRecordDecl *Class @@ -11579,7 +11586,8 @@ TreeTransform::TransformLambdaExpr(LambdaExpr *E) { Class, E->getIntroducerRange(), NewCallOpTSI, E->getCallOperator()->getEndLoc(), NewCallOpTSI->getTypeLoc().castAs().getParams(), - E->getCallOperator()->getConstexprKind()); + E->getCallOperator()->getConstexprKind(), + NewTrailingRequiresClause.get()); LSI->CallOperator = NewCallOperator; diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 3351f76151e35..a132164d30e75 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -822,6 +822,7 @@ void ASTDeclReader::VisitDeclaratorDecl(DeclaratorDecl *DD) { if (Record.readInt()) { // hasExtInfo auto *Info = new (Reader.getContext()) DeclaratorDecl::ExtInfo(); Record.readQualifierInfo(*Info); + Info->TrailingRequiresClause = Record.readExpr(); DD->DeclInfo = Info; } QualType TSIType = Record.readType(); diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 66f4db855a3e9..a553936570b59 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -523,8 +523,11 @@ void ASTDeclWriter::VisitDeclaratorDecl(DeclaratorDecl *D) { VisitValueDecl(D); Record.AddSourceLocation(D->getInnerLocStart()); Record.push_back(D->hasExtInfo()); - if (D->hasExtInfo()) - Record.AddQualifierInfo(*D->getExtInfo()); + if (D->hasExtInfo()) { + DeclaratorDecl::ExtInfo *Info = D->getExtInfo(); + Record.AddQualifierInfo(*Info); + Record.AddStmt(Info->TrailingRequiresClause); + } // The location information is deferred until the end of the record. Record.AddTypeRef(D->getTypeSourceInfo() ? D->getTypeSourceInfo()->getType() : QualType()); diff --git a/clang/test/Analysis/bstring.c b/clang/test/Analysis/bstring.c index 2d53402a9ad36..214f6537e10ed 100644 --- a/clang/test/Analysis/bstring.c +++ b/clang/test/Analysis/bstring.c @@ -222,6 +222,9 @@ void mempcpy2 () { char dst[1]; mempcpy(dst, src, 4); // expected-warning{{Memory copy function overflows destination buffer}} +#ifndef VARIANT +// expected-warning@-2{{'mempcpy' will always overflow; destination buffer has size 1, but size argument is 4}} +#endif } void mempcpy3 () { @@ -243,6 +246,9 @@ void mempcpy5() { char dst[3]; mempcpy(dst+2, src+2, 2); // expected-warning{{Memory copy function overflows destination buffer}} +#ifndef VARIANT +// expected-warning@-2{{'mempcpy' will always overflow; destination buffer has size 1, but size argument is 2}} +#endif } void mempcpy6() { diff --git a/clang/test/CXX/class.derived/class.virtual/p6.cpp b/clang/test/CXX/class.derived/class.virtual/p6.cpp new file mode 100644 index 0000000000000..63a4313de5541 --- /dev/null +++ b/clang/test/CXX/class.derived/class.virtual/p6.cpp @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 -std=c++2a -fconcepts-ts -verify %s + +template +class A { + virtual void f1() requires (sizeof(T) == 0); + // expected-error@-1{{virtual function cannot have a requires clause}} + virtual void f2() requires (sizeof(T) == 1); + // expected-error@-1{{virtual function cannot have a requires clause}} +}; + +template +class B : A { + virtual void f1() requires (sizeof(T) == 0) override {} + // expected-error@-1{{virtual function cannot have a requires clause}} +}; + +template struct C : T {void f() requires true; }; +// expected-error@-1{{virtual function cannot have a requires clause}} +struct D { virtual void f(); }; +template struct C; +// expected-note@-1{{in instantiation of template class 'C' requested here}} \ No newline at end of file diff --git a/clang/test/CXX/dcl/dcl.decl/p3.cpp b/clang/test/CXX/dcl/dcl.decl/p3.cpp new file mode 100644 index 0000000000000..eec0aa2043a2e --- /dev/null +++ b/clang/test/CXX/dcl/dcl.decl/p3.cpp @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -std=c++2a -fconcepts-ts -verify %s + +template +constexpr bool is_same_v = false; + +template +constexpr bool is_same_v = true; + +void f1(int a) requires true; // OK +auto f2(int a) -> bool requires true; // OK +auto f3(int a) -> bool (*)(int b) requires true; // OK +auto f4(int a) requires true -> bool; // expected-error{{trailing return type must appear before trailing requires clause}} +int f5(int a) requires; // expected-error{{expected expression}} +int f6(int a) requires {} // expected-error{{expected expression}} +void (f7()) requires true; +void (f8() requires true); // expected-error{{trailing requires clause should be placed outside parentheses}} +void (*(f9 requires (true)))(); // expected-error{{trailing requires clause should be placed outside parentheses}} +static_assert(is_same_v); +void (*pf)() requires true; // expected-error{{trailing requires clause can only be used when declaring a function}} +void g1(int (*dsdads)() requires false); // expected-error{{trailing requires clause can only be used when declaring a function}} +void g2(int (*(*dsdads)())() requires true); // expected-error{{trailing requires clause can only be used when declaring a function}} +void g3(int (*(*dsdads)(int) requires true)() ); // expected-error{{trailing requires clause should be placed outside parentheses}} +using T = void (); +T x requires true; +struct S { + T m1 requires true, m2 requires true; +}; + +template +struct R { + R(T t); +}; + +template +R(T) -> R requires true; // expected-error{{deduction guide cannot have a requires clause}} diff --git a/clang/test/CXX/expr/expr.prim/expr.prim.id/mixed-constraints.cpp b/clang/test/CXX/expr/expr.prim/expr.prim.id/mixed-constraints.cpp new file mode 100644 index 0000000000000..fafb3f7b35d9f --- /dev/null +++ b/clang/test/CXX/expr/expr.prim/expr.prim.id/mixed-constraints.cpp @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -std=c++2a -fconcepts-ts -verify %s + +template requires (sizeof(T) >= 4 && sizeof(T) <= 10) +// expected-note@-1{{because 'sizeof(char [20]) <= 10' (20 <= 10) evaluated to false}} +// expected-note@-2{{because 'sizeof(char) >= 4' (1 >= 4) evaluated to false}} +void foo() requires (sizeof(T) <= 8) {} +// expected-note@-1{{candidate template ignored: constraints not satisfied [with T = char]}} +// expected-note@-2{{candidate template ignored: constraints not satisfied [with T = char [9]]}} +// expected-note@-3{{candidate template ignored: constraints not satisfied [with T = char [20]]}} +// expected-note@-4{{because 'sizeof(char [9]) <= 8' (9 <= 8) evaluated to false}} + +void bar() { + foo(); // expected-error{{no matching function for call to 'foo'}} + foo(); + foo(); + foo(); // expected-error{{no matching function for call to 'foo'}} + foo(); // expected-error{{no matching function for call to 'foo'}} +} \ No newline at end of file diff --git a/clang/test/CXX/expr/expr.prim/expr.prim.id/p4.cpp b/clang/test/CXX/expr/expr.prim/expr.prim.id/p4.cpp new file mode 100644 index 0000000000000..f13ab279da33a --- /dev/null +++ b/clang/test/CXX/expr/expr.prim/expr.prim.id/p4.cpp @@ -0,0 +1,58 @@ +// RUN: %clang_cc1 -std=c++2a -fconcepts-ts -verify %s + +namespace functions +{ + void foo(int) requires false {} + // expected-note@-1 3{{because 'false' evaluated to false}} + // expected-note@-2 {{candidate function not viable: constraints not satisfied}} + void bar(int) requires true {} + + void a(int); + void a(double); + + void baz() { + foo(1); // expected-error{{no matching function for call to 'foo'}} + bar(1); + void (*p1)(int) = foo; // expected-error{{invalid reference to function 'foo': constraints not satisfied}} + void (*p3)(int) = bar; + decltype(foo)* a1 = nullptr; // expected-error{{invalid reference to function 'foo': constraints not satisfied}} + decltype(bar)* a2 = nullptr; + } +} + +namespace methods +{ + template + struct A { + static void foo(int) requires (sizeof(T) == 1) {} // expected-note 3{{because 'sizeof(char [2]) == 1' (2 == 1) evaluated to false}} + static void bar(int) requires (sizeof(T) == 2) {} // expected-note 3{{because 'sizeof(char) == 2' (1 == 2) evaluated to false}} + }; + + void baz() { + A::foo(1); + A::bar(1); // expected-error{{invalid reference to function 'bar': constraints not satisfied}} + A::foo(1); // expected-error{{invalid reference to function 'foo': constraints not satisfied}} + A::bar(1); + void (*p1)(int) = A::foo; + void (*p2)(int) = A::bar; // expected-error{{invalid reference to function 'bar': constraints not satisfied}} + void (*p3)(int) = A::foo; // expected-error{{invalid reference to function 'foo': constraints not satisfied}} + void (*p4)(int) = A::bar; + decltype(A::foo)* a1 = nullptr; + decltype(A::bar)* a2 = nullptr; // expected-error{{invalid reference to function 'bar': constraints not satisfied}} + decltype(A::foo)* a3 = nullptr; // expected-error{{invalid reference to function 'foo': constraints not satisfied}} + decltype(A::bar)* a4 = nullptr; + } +} + +namespace operators +{ + template + struct A { + A operator-(A b) requires (sizeof(T) == 1) { return b; } // expected-note{{because 'sizeof(int) == 1' (4 == 1) evaluated to false}} + }; + + void baz() { + auto* x = &A::operator-; // expected-error{{invalid reference to function 'operator-': constraints not satisfied}} + auto y = &A::operator-; + } +} \ No newline at end of file diff --git a/clang/test/CXX/expr/expr.prim/expr.prim.lambda/expr.prim.lambda.closure/p3.cpp b/clang/test/CXX/expr/expr.prim/expr.prim.lambda/expr.prim.lambda.closure/p3.cpp new file mode 100644 index 0000000000000..942280e1059fb --- /dev/null +++ b/clang/test/CXX/expr/expr.prim/expr.prim.lambda/expr.prim.lambda.closure/p3.cpp @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -std=c++2a -fconcepts-ts -verify %s + +auto l1 = [] (auto x) requires (sizeof(decltype(x)) == 1) { return x; }; +// expected-note@-1{{candidate template ignored: constraints not satisfied [with $0 = int]}} +// expected-note@-2{{because 'sizeof(decltype(x)) == 1' (4 == 1) evaluated to false}} + +auto l1t1 = l1('a'); +auto l1t2 = l1(1); +// expected-error@-1{{no matching function for call to object of type '(lambda at}} + +auto l2 = [] (auto... x) requires ((sizeof(decltype(x)) >= 2) && ...) { return (x + ...); }; +// expected-note@-1{{candidate template ignored: constraints not satisfied [with $0 = ]}} +// expected-note@-2{{candidate template ignored: constraints not satisfied [with $0 = ]}} +// expected-note@-3 2{{because 'sizeof(decltype(x)) >= 2' (1 >= 2) evaluated to false}} + +auto l2t1 = l2('a'); +// expected-error@-1{{no matching function for call to object of type '(lambda at}} +auto l2t2 = l2(1, 'a'); +// expected-error@-1{{no matching function for call to object of type '(lambda at}} +auto l2t3 = l2((short)1, (short)1); \ No newline at end of file diff --git a/clang/test/CXX/over/over.match/over.match.best/p1-2a.cpp b/clang/test/CXX/over/over.match/over.match.best/p1-2a.cpp new file mode 100644 index 0000000000000..36c68071448c7 --- /dev/null +++ b/clang/test/CXX/over/over.match/over.match.best/p1-2a.cpp @@ -0,0 +1,113 @@ +// RUN: %clang_cc1 -std=c++2a -fconcepts-ts -verify %s + +template +constexpr static bool is_same_v = false; + +template +constexpr static bool is_same_v = true; + +namespace templates +{ + template + concept AtLeast1 = sizeof(T) >= 1; + + template + int foo(T t) requires (sizeof(T) == 4) { // expected-note {{candidate function}} + return 0; + } + + template + char foo(T t) requires AtLeast1 { // expected-note {{candidate function}} + return 'a'; + } + + template + double foo(T t) requires (AtLeast1 && sizeof(T) <= 2) { + return 'a'; + } + + static_assert(is_same_v); // expected-error {{call to 'foo' is ambiguous}} + static_assert(is_same_v); + + template + void bar() requires (sizeof(T) == 1) { } + // expected-note@-1{{similar constraint expressions not considered equivalent}} + // expected-note@-2{{candidate function [with T = char]}} + + template + void bar() requires (sizeof(T) == 1 && sizeof(T) >= 0) { } + // expected-note@-1{{candidate function [with T = char]}} + // expected-note@-2{{similar constraint expression here}} + + static_assert(is_same_v()), void>); + // expected-error@-1{{call to 'bar' is ambiguous}} + + template + constexpr int baz() requires AtLeast1 { // expected-note {{candidate function}} + return 1; + } + + template requires AtLeast1 + constexpr int baz() { // expected-note {{candidate function [with T = int]}} + return 2; + } + + static_assert(baz() == 1); // expected-error {{call to 'baz' is ambiguous}} +} + +namespace non_template +{ + template + concept AtLeast2 = sizeof(T) >= 2; + + template + concept AtMost8 = sizeof(T) <= 8; + + int foo() requires AtLeast2 && AtMost8 { + return 0; + } + + double foo() requires AtLeast2 { + return 0.0; + } + + double baz() requires AtLeast2 && AtMost8 { // expected-note {{candidate function}} + return 0.0; + } + + int baz() requires AtMost8 && AtLeast2 { // expected-note {{candidate function}} + return 0.0; + } + + void bar() requires (sizeof(char[8]) >= 8) { } + // expected-note@-1 {{candidate function}} + // expected-note@-2 {{similar constraint expressions not considered equivalent}} + + void bar() requires (sizeof(char[8]) >= 8 && sizeof(int) <= 30) { } + // expected-note@-1 {{candidate function}} + // expected-note@-2 {{similar constraint expression here}} + + static_assert(is_same_v); + static_assert(is_same_v); // expected-error {{call to 'baz' is ambiguous}} + static_assert(is_same_v); // expected-error {{call to 'bar' is ambiguous}} + + constexpr int goo(int a) requires AtLeast2 && true { + return 1; + } + + constexpr int goo(const int b) requires AtLeast2 { + return 2; + } + + // Only trailing requires clauses of redeclarations are compared for overload resolution. + constexpr int doo(int a, ...) requires AtLeast2 && true { // expected-note {{candidate function}} + return 1; + } + + constexpr int doo(int b) requires AtLeast2 { // expected-note {{candidate function}} + return 2; + } + + static_assert(goo(1) == 1); + static_assert(doo(2) == 1); // expected-error {{call to 'doo' is ambiguous}} +} diff --git a/clang/test/CXX/over/over.match/over.match.viable/p3.cpp b/clang/test/CXX/over/over.match/over.match.viable/p3.cpp new file mode 100644 index 0000000000000..ef752d76ec23d --- /dev/null +++ b/clang/test/CXX/over/over.match/over.match.viable/p3.cpp @@ -0,0 +1,63 @@ +// RUN: %clang_cc1 -std=c++2a -fconcepts-ts -verify %s + +struct S2 {}; +// expected-note@-1 {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'S1' to 'const S2' for 1st argument}} +// expected-note@-2 {{candidate constructor (the implicit move constructor) not viable: no known conversion from 'S1' to 'S2' for 1st argument}} +// expected-note@-3 {{candidate constructor (the implicit default constructor) not viable: requires 0 arguments, but 1 was provided}} + +struct S1 { + void foo() const requires true {} + void foo() const requires false {} + void bar() const requires false {} + // expected-note@-1 {{because 'false' evaluated to false}} + operator bool() const requires true { return true; } + explicit operator bool() const requires false; + explicit operator S2() const requires false; + // expected-note@-1 {{candidate function not viable: constraints not satisfied}} + // expected-note@-2 {{because 'false' evaluated to false}} +}; + +void foo() { + S1().foo(); + S1().bar(); + // expected-error@-1 {{invalid reference to function 'bar': constraints not satisfied}} + (void) static_cast(S1()); + (void) static_cast(S1()); + // expected-error@-1 {{no matching conversion for static_cast from 'S1' to 'S2'}} +} + +// Test that constraints are checked before implicit conversions are formed. + +template +struct invalid_template { using X = typename T::non_existant; }; +struct A { + template::aadasas> + operator T() {} +}; + +void foo(int) requires false; +void foo(A) requires true; + +struct S { + void foo(int) requires false; + void foo(A) requires true; + S(A) requires false; + S(double) requires true; + ~S() requires false; + // expected-note@-1 2{{because 'false' evaluated to false}} + ~S() requires true; + operator int() requires true; + operator int() requires false; +}; + +void bar() { + foo(A{}); + S{1.}.foo(A{}); + // expected-error@-1{{invalid reference to function '~S': constraints not satisfied}} + // Note - this behavior w.r.t. constrained dtors is a consequence of current + // wording, which does not invoke overload resolution when a dtor is called. + // P0848 is set to address this issue. + S s = 1; + // expected-error@-1{{invalid reference to function '~S': constraints not satisfied}} + int a = s; +} \ No newline at end of file diff --git a/clang/test/CXX/over/over.over/p4-2a.cpp b/clang/test/CXX/over/over.over/p4-2a.cpp new file mode 100644 index 0000000000000..a5d7a110992cf --- /dev/null +++ b/clang/test/CXX/over/over.over/p4-2a.cpp @@ -0,0 +1,61 @@ +// RUN: %clang_cc1 -std=c++2a -fconcepts-ts -verify %s + +template +constexpr static bool is_same_v = false; + +template +constexpr static bool is_same_v = true; + +template +concept AtLeast2 = sizeof(T) >= 2; + +template +concept AtMost8 = sizeof(T) <= 8; + +int foo() requires AtLeast2 && AtMost8 { + return 0; +} + +double foo() requires AtLeast2 { + return 0.0; +} + +char bar() requires AtLeast2 { // expected-note {{possible target for call}} + return 1.0; +} + +short bar() requires AtLeast2 && AtMost8 { +// expected-note@-1{{possible target for call}} +// expected-note@-2{{candidate function}} + return 0.0; +} + +int bar() requires AtMost8 && AtLeast2 { +// expected-note@-1{{possible target for call}} +// expected-note@-2{{candidate function}} + return 0.0; +} + +char baz() requires AtLeast2 { + return 1.0; +} + +short baz() requires AtLeast2 && AtMost8 { + return 0.0; +} + +int baz() requires AtMost8 && AtLeast2 { + return 0.0; +} + +long baz() requires AtMost8 && AtLeast2 && AtLeast2 { + return 3.0; +} + +void a() { + static_assert(is_same_v); + static_assert(is_same_v); + // expected-error@-1{{reference to overloaded function could not be resolved; did you mean to call it with no arguments?}} + // expected-error@-2{{call to 'bar' is ambiguous}} + static_assert(is_same_v); +} \ No newline at end of file diff --git a/clang/test/CXX/temp/temp.constr/temp.constr.constr/function-templates.cpp b/clang/test/CXX/temp/temp.constr/temp.constr.constr/function-templates.cpp index c1a3a27fbeacc..99de7261a81c8 100644 --- a/clang/test/CXX/temp/temp.constr/temp.constr.constr/function-templates.cpp +++ b/clang/test/CXX/temp/temp.constr/temp.constr.constr/function-templates.cpp @@ -23,14 +23,13 @@ static_assert(is_same_v(nullptr)), int>); static_assert(is_same_v); // expected-error {{no matching function for call to 'dereference'}} static_assert(is_same_v('a')), char>); // expected-error {{no matching function for call to 'dereference'}} - -template requires T{} + T{} // expected-note {{because substituted constraint expression is ill-formed: invalid operands to binary expression ('A' and 'A')}} +template requires (T{} + T{}) // expected-note {{because substituted constraint expression is ill-formed: invalid operands to binary expression ('A' and 'A')}} auto foo(T t) { // expected-note {{candidate template ignored: constraints not satisfied [with T = A]}} return t + t; } -template requires !((T{} - T{}) && (T{} + T{})) || false +template requires (!((T{} - T{}) && (T{} + T{})) || false) // expected-note@-1{{because substituted constraint expression is ill-formed: invalid operands to binary expression ('A' and 'A')}} // expected-note@-2{{and 'false' evaluated to false}} auto bar(T t) { // expected-note {{candidate template ignored: constraints not satisfied [with T = A]}} diff --git a/clang/test/CXX/temp/temp.constr/temp.constr.constr/non-function-templates.cpp b/clang/test/CXX/temp/temp.constr/temp.constr.constr/non-function-templates.cpp index 24caa5063a1b4..a25b22a9a1544 100644 --- a/clang/test/CXX/temp/temp.constr/temp.constr.constr/non-function-templates.cpp +++ b/clang/test/CXX/temp/temp.constr/temp.constr.constr/non-function-templates.cpp @@ -1,6 +1,6 @@ // RUN: %clang_cc1 -std=c++2a -fconcepts-ts -x c++ -verify %s -template requires sizeof(T) >= 2 // expected-note{{because 'sizeof(char) >= 2' (1 >= 2) evaluated to false}} +template requires (sizeof(T) >= 2) // expected-note{{because 'sizeof(char) >= 2' (1 >= 2) evaluated to false}} struct A { static constexpr int value = sizeof(T); }; @@ -9,8 +9,8 @@ static_assert(A::value == 4); static_assert(A::value == 1); // expected-error{{constraints not satisfied for class template 'A' [with T = char]}} template - requires sizeof(T) != sizeof(U) // expected-note{{because 'sizeof(int) != sizeof(char [4])' (4 != 4) evaluated to false}} - && sizeof(T) >= 4 // expected-note{{because 'sizeof(char) >= 4' (1 >= 4) evaluated to false}} + requires (sizeof(T) != sizeof(U) // expected-note{{because 'sizeof(int) != sizeof(char [4])' (4 != 4) evaluated to false}} + && sizeof(T) >= 4) // expected-note{{because 'sizeof(char) >= 4' (1 >= 4) evaluated to false}} constexpr int SizeDiff = sizeof(T) > sizeof(U) ? sizeof(T) - sizeof(U) : sizeof(U) - sizeof(T); static_assert(SizeDiff == 3); @@ -44,16 +44,16 @@ static_assert(S::value); template struct AA { - template requires sizeof(U) == sizeof(T) // expected-note{{because 'sizeof(int [2]) == sizeof(int)' (8 == 4) evaluated to false}} + template requires (sizeof(U) == sizeof(T)) // expected-note{{because 'sizeof(int [2]) == sizeof(int)' (8 == 4) evaluated to false}} struct B { static constexpr int a = 0; }; - template requires sizeof(U) == sizeof(T) // expected-note{{because 'sizeof(int [2]) == sizeof(int)' (8 == 4) evaluated to false}} + template requires (sizeof(U) == sizeof(T)) // expected-note{{because 'sizeof(int [2]) == sizeof(int)' (8 == 4) evaluated to false}} static constexpr int b = 1; - template requires sizeof(U) == sizeof(T) // expected-note{{because 'sizeof(int [2]) == sizeof(int)' (8 == 4) evaluated to false}} + template requires (sizeof(U) == sizeof(T)) // expected-note{{because 'sizeof(int [2]) == sizeof(int)' (8 == 4) evaluated to false}} static constexpr int getB() { // expected-note{{candidate template ignored: constraints not satisfied [with U = int [2]]}} return 2; } @@ -85,8 +85,8 @@ template requires B::type // expected-note{{in instantiation of t // expected-note@-1{{while substituting template arguments into constraint expression here}} struct C { }; -template requires T{} // expected-error{{atomic constraint must be of type 'bool' (found 'int')}} +template requires (T{}) // expected-error{{atomic constraint must be of type 'bool' (found 'int')}} struct D { }; static_assert(C{}); // expected-note{{while checking constraint satisfaction for template 'C' required here}} -static_assert(D{}); // expected-note{{while checking constraint satisfaction for template 'D' required here}} \ No newline at end of file +static_assert(D{}); // expected-note{{while checking constraint satisfaction for template 'D' required here}} diff --git a/clang/test/CXX/temp/temp.constr/temp.constr.constr/partial-specializations.cpp b/clang/test/CXX/temp/temp.constr/temp.constr.constr/partial-specializations.cpp index 47bd2a5507690..1ea4da29ee9f5 100644 --- a/clang/test/CXX/temp/temp.constr/temp.constr.constr/partial-specializations.cpp +++ b/clang/test/CXX/temp/temp.constr/temp.constr.constr/partial-specializations.cpp @@ -2,10 +2,10 @@ namespace class_templates { - template requires sizeof(T) >= 4 // expected-note {{because 'sizeof(char) >= 4' (1 >= 4) evaluated to false}} + template requires (sizeof(T) >= 4) // expected-note {{because 'sizeof(char) >= 4' (1 >= 4) evaluated to false}} struct is_same { static constexpr bool value = false; }; - template requires sizeof(T*) >= 4 && sizeof(T) >= 4 + template requires (sizeof(T*) >= 4 && sizeof(T) >= 4) struct is_same { static constexpr bool value = true; }; static_assert(!is_same::value); @@ -23,7 +23,7 @@ namespace class_templates // expected-note@-1{{while substituting template arguments into constraint expression here}} struct B {}; - template requires T{} // expected-error{{atomic constraint must be of type 'bool' (found 'int')}} + template requires (T{}) // expected-error{{atomic constraint must be of type 'bool' (found 'int')}} struct B {}; static_assert((B{}, true)); // expected-note{{while checking constraint satisfaction for class template partial specialization 'B' required here}} @@ -35,10 +35,10 @@ namespace class_templates namespace variable_templates { - template requires sizeof(T) >= 4 + template requires (sizeof(T) >= 4) constexpr bool is_same_v = false; - template requires sizeof(T*) >= 4 && sizeof(T) >= 4 + template requires (sizeof(T*) >= 4 && sizeof(T) >= 4) constexpr bool is_same_v = true; static_assert(!is_same_v); @@ -55,7 +55,7 @@ namespace variable_templates // expected-note@-1{{while substituting template arguments into constraint expression here}} constexpr bool v1 = true; - template requires T{} // expected-error{{atomic constraint must be of type 'bool' (found 'int')}} + template requires (T{}) // expected-error{{atomic constraint must be of type 'bool' (found 'int')}} constexpr bool v1 = true; static_assert(v1); // expected-note{{while checking constraint satisfaction for variable template partial specialization 'v1' required here}} diff --git a/clang/test/CXX/temp/temp.constr/temp.constr.decl/class-template-decl.cpp b/clang/test/CXX/temp/temp.constr/temp.constr.decl/class-template-decl.cpp index 5d5361f9c20c3..6f7b80e26a66a 100644 --- a/clang/test/CXX/temp/temp.constr/temp.constr.decl/class-template-decl.cpp +++ b/clang/test/CXX/temp/temp.constr/temp.constr.decl/class-template-decl.cpp @@ -2,9 +2,9 @@ namespace nodiag { -template requires bool(T()) +template requires (bool(T())) struct A; -template requires bool(U()) +template requires (bool(U())) struct A; } // end namespace nodiag @@ -21,7 +21,7 @@ struct B; template requires true // expected-note{{previous template declaration is here}} struct C; -template requires !0 // expected-error{{requires clause differs in template redeclaration}} +template requires (!0) // expected-error{{requires clause differs in template redeclaration}} struct C; } // end namespace diag @@ -29,15 +29,15 @@ struct C; namespace nodiag { struct AA { - template requires someFunc(T()) + template requires (someFunc(T())) struct A; }; -template requires someFunc(U()) +template requires (someFunc(U())) struct AA::A { }; struct AAF { - template requires someFunc(T()) + template requires (someFunc(T())) friend struct AA::A; }; diff --git a/clang/test/CXX/temp/temp.constr/temp.constr.decl/func-template-decl.cpp b/clang/test/CXX/temp/temp.constr/temp.constr.decl/func-template-decl.cpp index c83ab26059d7c..30fbec64eea78 100644 --- a/clang/test/CXX/temp/temp.constr/temp.constr.decl/func-template-decl.cpp +++ b/clang/test/CXX/temp/temp.constr/temp.constr.decl/func-template-decl.cpp @@ -2,9 +2,9 @@ namespace nodiag { -template requires bool(T()) +template requires (bool(T())) int A(); -template requires bool(U()) +template requires (bool(U())) int A(); } // end namespace nodiag @@ -26,7 +26,7 @@ int orig::A(); template requires true int orig::B(); // expected-error@-1{{out-of-line declaration of 'B' does not match any declaration in namespace 'diag::orig'}} -template requires !0 +template requires (!0) int orig::C(); // expected-error@-1{{out-of-line declaration of 'C' does not match any declaration in namespace 'diag::orig'}} @@ -35,11 +35,11 @@ int orig::C(); namespace nodiag { struct AA { - template requires someFunc(T()) + template requires (someFunc(T())) int A(); }; -template requires someFunc(T()) +template requires (someFunc(T())) int AA::A() { return sizeof(T); } } // end namespace nodiag diff --git a/clang/test/CXX/temp/temp.constr/temp.constr.decl/var-template-decl.cpp b/clang/test/CXX/temp/temp.constr/temp.constr.decl/var-template-decl.cpp index cf6874f12d3f5..eabb636b0bbbf 100644 --- a/clang/test/CXX/temp/temp.constr/temp.constr.decl/var-template-decl.cpp +++ b/clang/test/CXX/temp/temp.constr/temp.constr.decl/var-template-decl.cpp @@ -3,11 +3,11 @@ namespace nodiag { struct B { - template requires bool(T()) + template requires (bool(T())) static int A; }; -template requires bool(U()) +template requires (bool(U())) int B::A = int(U()); } // end namespace nodiag @@ -15,11 +15,11 @@ int B::A = int(U()); namespace diag { struct B { - template requires bool(T()) // expected-note{{previous template declaration is here}} + template requires (bool(T())) // expected-note{{previous template declaration is here}} static int A; }; -template requires !bool(U()) // expected-error{{requires clause differs in template redeclaration}} +template requires (!bool(U())) // expected-error{{requires clause differs in template redeclaration}} int B::A = int(U()); } // end namespace diag \ No newline at end of file diff --git a/clang/test/CXX/temp/temp.constr/temp.constr.order/class-template-partial-specializations.cpp b/clang/test/CXX/temp/temp.constr/temp.constr.order/class-template-partial-specializations.cpp index 8c2f552694173..5d41035aa88d7 100644 --- a/clang/test/CXX/temp/temp.constr/temp.constr.order/class-template-partial-specializations.cpp +++ b/clang/test/CXX/temp/temp.constr/temp.constr.order/class-template-partial-specializations.cpp @@ -1,9 +1,12 @@ // RUN: %clang_cc1 -std=c++2a -fconcepts-ts -x c++ -verify %s -template requires sizeof(T) >= 4 +template requires (sizeof(T) >= 4) +// expected-note@-1{{similar constraint expressions not considered equivalen}} class A{}; // expected-note{{template is declared here}} -template requires sizeof(T) >= 4 && sizeof(T) <= 10 +template requires (sizeof(T) >= 4 && sizeof(T) <= 10) +// expected-note@-1{{similar constraint expression here}} + class A{}; // expected-error{{class template partial specialization is not more specialized than the primary template}} template @@ -12,7 +15,7 @@ concept C1 = sizeof(T) >= 4; template requires C1 class B{}; -template requires C1 && sizeof(T) <= 10 +template requires (C1 && sizeof(T) <= 10) class B{}; template @@ -48,3 +51,15 @@ struct F{ enum{ value = 3 }; }; static_assert(F::value == 2); static_assert(F::value == 3); static_assert(F::value == 1); + +// Make sure atomic constraints subsume each other only if their parameter +// mappings are identical. + +template requires C2 +struct I { }; // expected-note {{template is declared here}} + +template requires C2 +struct I { }; // expected-error {{class template partial specialization is not more specialized than the primary template}} + +template requires C2 && C2 +struct I { }; diff --git a/clang/test/CXX/temp/temp.constr/temp.constr.order/function-templates.cpp b/clang/test/CXX/temp/temp.constr/temp.constr.order/function-templates.cpp index cc578fe0ad62c..7f68369d52842 100644 --- a/clang/test/CXX/temp/temp.constr/temp.constr.order/function-templates.cpp +++ b/clang/test/CXX/temp/temp.constr/temp.constr.order/function-templates.cpp @@ -1,9 +1,11 @@ // RUN: %clang_cc1 -std=c++2a -fconcepts-ts -x c++ -verify %s -template requires sizeof(T) >= 4 +template requires (sizeof(T) >= 4) +// expected-note@-1{{similar constraint expressions not considered equivalent}} bool a() { return false; } // expected-note {{candidate function [with T = unsigned int]}} -template requires sizeof(T) >= 4 && sizeof(T) <= 10 +template requires (sizeof(T) >= 4 && sizeof(T) <= 10) +// expected-note@-1{{similar constraint expression here}} bool a() { return true; } // expected-note {{candidate function [with T = unsigned int]}} bool av = a(); // expected-error {{call to 'a' is ambiguous}} @@ -14,7 +16,7 @@ concept C1 = sizeof(T) >= 4; template requires C1 constexpr bool b() { return false; } -template requires C1 && sizeof(T) <= 10 +template requires (C1 && sizeof(T) <= 10) constexpr bool b() { return true; } static_assert(b()); @@ -86,4 +88,4 @@ static_assert(sizeof(g())); template struct X {}; template int h(X<0>); template int h(X); -static_assert(sizeof(h(X<0>{}))); \ No newline at end of file +static_assert(sizeof(h(X<0>{}))); diff --git a/clang/test/CXX/temp/temp.constr/temp.constr.order/var-template-partial-specializations.cpp b/clang/test/CXX/temp/temp.constr/temp.constr.order/var-template-partial-specializations.cpp index b40c77e70a194..cf88e34036dc7 100644 --- a/clang/test/CXX/temp/temp.constr/temp.constr.order/var-template-partial-specializations.cpp +++ b/clang/test/CXX/temp/temp.constr/temp.constr.order/var-template-partial-specializations.cpp @@ -1,9 +1,11 @@ // RUN: %clang_cc1 -std=c++2a -fconcepts-ts -x c++ -verify %s -template requires sizeof(T) >= 4 +template requires (sizeof(T) >= 4) +// expected-note@-1{{similar constraint expressions not considered equivalent}} bool a = false; // expected-note{{template is declared here}} -template requires sizeof(T) >= 4 && sizeof(T) <= 10 +template requires (sizeof(T) >= 4 && sizeof(T) <= 10) +// expected-note@-1{{similar constraint expression here}} bool a = true; // expected-error{{variable template partial specialization is not more specialized than the primary template}} template @@ -12,7 +14,7 @@ concept C1 = sizeof(T) >= 4; template requires C1 bool b = false; -template requires C1 && sizeof(T) <= 10 +template requires (C1 && sizeof(T) <= 10) bool b = true; template diff --git a/clang/test/CXX/temp/temp.explicit/p8.cpp b/clang/test/CXX/temp/temp.explicit/p8.cpp new file mode 100644 index 0000000000000..72d2255789960 --- /dev/null +++ b/clang/test/CXX/temp/temp.explicit/p8.cpp @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -std=c++2a -fconcepts-ts -x c++ -verify %s + +template requires (sizeof(T) + sizeof(S) < 10) +// expected-note@-1{{because 'sizeof(char [100]) + sizeof(char) < 10' (101 < 10) evaluated to false}} +void f(T t, S s) requires (sizeof(t) == 1 && sizeof(s) == 1) { }; +// expected-note@-1{{candidate template ignored: constraints not satisfied [with T = int, S = char]}} +// expected-note@-2{{because 'sizeof (t) == 1' (4 == 1) evaluated to false}} +// expected-note@-3{{candidate template ignored: constraints not satisfied [with T = char, S = short]}} +// expected-note@-4{{because 'sizeof (s) == 1' (2 == 1) evaluated to false}} +// expected-note@-5{{candidate template ignored: constraints not satisfied [with T = char [100], S = char]}} + +template<> +void f(int t, char s) { }; +// expected-error@-1{{no function template matches function template specialization 'f'}} + +template<> +void f(char t, short s) { }; +// expected-error@-1{{no function template matches function template specialization 'f'}} + +template<> +void f(char t[100], char s) { }; +// expected-error@-1{{no function template matches function template specialization 'f'}} \ No newline at end of file diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vld24.c b/clang/test/CodeGen/arm-mve-intrinsics/vld24.c index 984d5989217e1..a0f37fe65d3de 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vld24.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vld24.c @@ -98,3 +98,45 @@ void test_vst2q_f16(float16_t *addr, float16x8x2_t value) vst2q_f16(addr, value); #endif /* POLYMORPHIC */ } + +// CHECK-LABEL: @load_into_variable( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.mve.vld2q.v8i16.p0i16(i16* [[ADDR:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T:%.*]] undef, <8 x i16> [[TMP1]], 0, 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T]] [[TMP2]], <8 x i16> [[TMP3]], 0, 1 +// CHECK-NEXT: store <8 x i16> [[TMP1]], <8 x i16>* [[VALUES:%.*]], align 8 +// CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[VALUES]], i32 1 +// CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[ARRAYIDX4]], align 8 +// CHECK-NEXT: ret void +// +void load_into_variable(const uint16_t *addr, uint16x8_t *values) +{ + uint16x8x2_t v; +#ifdef POLYMORPHIC + v = vld2q(addr); +#else /* POLYMORPHIC */ + v = vld2q_u16(addr); +#endif /* POLYMORPHIC */ + values[0] = v.val[0]; + values[1] = v.val[1]; +} + +// CHECK-LABEL: @extract_one_vector( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0i32(i32* [[ADDR:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[STRUCT_INT32X4X2_T:%.*]] undef, <4 x i32> [[TMP1]], 0, 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue [[STRUCT_INT32X4X2_T]] [[TMP2]], <4 x i32> [[TMP3]], 0, 1 +// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// +int32x4_t extract_one_vector(const int32_t *addr) +{ +#ifdef POLYMORPHIC + return vld2q(addr).val[0]; +#else /* POLYMORPHIC */ + return vld2q_s32(addr).val[0]; +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-target-features.c b/clang/test/CodeGen/arm-target-features.c index 03719f8a9e5dc..11fe4e505439f 100644 --- a/clang/test/CodeGen/arm-target-features.c +++ b/clang/test/CodeGen/arm-target-features.c @@ -1,23 +1,23 @@ // REQUIRES: arm-registered-target // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu cortex-a8 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP3 -// CHECK-VFP3: "target-features"="+armv7-a,+d32,+dsp,+fp64,+fpregs,+neon,+thumb-mode,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp" +// CHECK-VFP3: "target-features"="+armv7-a,+d32,+dsp,+fp64,+neon,+thumb-mode,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp" // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu cortex-a5 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4 -// CHECK-VFP4: "target-features"="+armv7-a,+d32,+dsp,+fp16,+fp64,+fpregs,+neon,+thumb-mode,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp,+vfp4,+vfp4d16,+vfp4d16sp,+vfp4sp" +// CHECK-VFP4: "target-features"="+armv7-a,+d32,+dsp,+fp16,+fp64,+neon,+thumb-mode,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp,+vfp4,+vfp4d16,+vfp4d16sp,+vfp4sp" // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu cortex-a7 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV // RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-a12 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV // RUN: %clang_cc1 -triple thumbv7s-linux-gnueabi -target-cpu swift -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV-2 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu krait -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV -// CHECK-VFP4-DIV: "target-features"="+armv7-a,+d32,+dsp,+fp16,+fp64,+fpregs,+hwdiv,+hwdiv-arm,+neon,+thumb-mode,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp,+vfp4,+vfp4d16,+vfp4d16sp,+vfp4sp" -// CHECK-VFP4-DIV-2: "target-features"="+armv7s,+d32,+dsp,+fp16,+fp64,+fpregs,+hwdiv,+hwdiv-arm,+neon,+thumb-mode,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp,+vfp4,+vfp4d16,+vfp4d16sp,+vfp4sp" +// CHECK-VFP4-DIV: "target-features"="+armv7-a,+d32,+dsp,+fp16,+fp64,+hwdiv,+hwdiv-arm,+neon,+thumb-mode,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp,+vfp4,+vfp4d16,+vfp4d16sp,+vfp4sp" +// CHECK-VFP4-DIV-2: "target-features"="+armv7s,+d32,+dsp,+fp16,+fp64,+hwdiv,+hwdiv-arm,+neon,+thumb-mode,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp,+vfp4,+vfp4d16,+vfp4d16sp,+vfp4sp" // RUN: %clang_cc1 -triple armv7-linux-gnueabihf -target-cpu cortex-a15 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV-ARM // RUN: %clang_cc1 -triple armv7-linux-gnueabihf -target-cpu cortex-a17 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV-ARM -// CHECK-VFP4-DIV-ARM: "target-features"="+armv7-a,+d32,+dsp,+fp16,+fp64,+fpregs,+hwdiv,+hwdiv-arm,+neon,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp,+vfp4,+vfp4d16,+vfp4d16sp,+vfp4sp,-thumb-mode" +// CHECK-VFP4-DIV-ARM: "target-features"="+armv7-a,+d32,+dsp,+fp16,+fp64,+hwdiv,+hwdiv-arm,+neon,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp,+vfp4,+vfp4d16,+vfp4d16sp,+vfp4sp,-thumb-mode" // RUN: %clang_cc1 -triple thumbv7s-apple-ios7.0 -target-cpu cyclone -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8 // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a32 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8 @@ -26,34 +26,34 @@ // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a72 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8 // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a73 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8 // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu exynos-m3 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8 -// CHECK-BASIC-V8: "target-features"="+armv8-a,+crc,+crypto,+d32,+dsp,+fp-armv8,+fp-armv8d16,+fp-armv8d16sp,+fp-armv8sp,+fp16,+fp64,+fpregs,+hwdiv,+hwdiv-arm,+neon,+thumb-mode,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp,+vfp4,+vfp4d16,+vfp4d16sp,+vfp4sp" +// CHECK-BASIC-V8: "target-features"="+armv8-a,+crc,+crypto,+d32,+dsp,+fp-armv8,+fp-armv8d16,+fp-armv8d16sp,+fp-armv8sp,+fp16,+fp64,+hwdiv,+hwdiv-arm,+neon,+thumb-mode,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp,+vfp4,+vfp4d16,+vfp4d16sp,+vfp4sp" // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu exynos-m4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V82 // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu exynos-m5 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V82 -// CHECK-BASIC-V82: "target-features"="+armv8.2-a,+crc,+crypto,+d32,+dotprod,+dsp,+fp-armv8,+fp-armv8d16,+fp-armv8d16sp,+fp-armv8sp,+fp16,+fp64,+fpregs,+fullfp16,+hwdiv,+hwdiv-arm,+neon,+ras,+thumb-mode,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp,+vfp4,+vfp4d16,+vfp4d16sp,+vfp4sp" +// CHECK-BASIC-V82: "target-features"="+armv8.2-a,+crc,+crypto,+d32,+dotprod,+dsp,+fp-armv8,+fp-armv8d16,+fp-armv8d16sp,+fp-armv8sp,+fp16,+fp64,+fullfp16,+hwdiv,+hwdiv-arm,+neon,+ras,+thumb-mode,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp,+vfp4,+vfp4d16,+vfp4d16sp,+vfp4sp" // RUN: %clang_cc1 -triple armv8-linux-gnueabi -target-cpu cortex-a53 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8-ARM -// CHECK-BASIC-V8-ARM: "target-features"="+armv8-a,+crc,+crypto,+d32,+dsp,+fp-armv8,+fp-armv8d16,+fp-armv8d16sp,+fp-armv8sp,+fp16,+fp64,+fpregs,+hwdiv,+hwdiv-arm,+neon,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp,+vfp4,+vfp4d16,+vfp4d16sp,+vfp4sp,-thumb-mode" +// CHECK-BASIC-V8-ARM: "target-features"="+armv8-a,+crc,+crypto,+d32,+dsp,+fp-armv8,+fp-armv8d16,+fp-armv8d16sp,+fp-armv8sp,+fp16,+fp64,+hwdiv,+hwdiv-arm,+neon,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp,+vfp4,+vfp4d16,+vfp4d16sp,+vfp4sp,-thumb-mode" // RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-r5 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP3-D16-DIV -// CHECK-VFP3-D16-DIV: "target-features"="+armv7-r,+dsp,+fp64,+fpregs,+hwdiv,+hwdiv-arm,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp" +// CHECK-VFP3-D16-DIV: "target-features"="+armv7-r,+dsp,+fp64,+hwdiv,+hwdiv-arm,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp" // RUN: %clang_cc1 -triple armv7-linux-gnueabi -target-cpu cortex-r4f -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP3-D16-THUMB-DIV -// CHECK-VFP3-D16-THUMB-DIV: "target-features"="+armv7-r,+dsp,+fp64,+fpregs,+hwdiv,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,-thumb-mode" +// CHECK-VFP3-D16-THUMB-DIV: "target-features"="+armv7-r,+dsp,+fp64,+hwdiv,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,-thumb-mode" // RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-r7 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP3-D16-FP16-DIV // RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-r8 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP3-D16-FP16-DIV -// CHECK-VFP3-D16-FP16-DIV: "target-features"="+armv7-r,+dsp,+fp16,+fp64,+fpregs,+hwdiv,+hwdiv-arm,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp" +// CHECK-VFP3-D16-FP16-DIV: "target-features"="+armv7-r,+dsp,+fp16,+fp64,+hwdiv,+hwdiv-arm,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp" // RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-m4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-D16-SP-THUMB-DIV -// CHECK-VFP4-D16-SP-THUMB-DIV: "target-features"="+armv7e-m,+dsp,+fp16,+fpregs,+hwdiv,+thumb-mode,+vfp2sp,+vfp3d16sp,+vfp4d16sp" +// CHECK-VFP4-D16-SP-THUMB-DIV: "target-features"="+armv7e-m,+dsp,+fp16,+hwdiv,+thumb-mode,+vfp2sp,+vfp3d16sp,+vfp4d16sp" // RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-m7 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP5-D16-THUMB-DIV -// CHECK-VFP5-D16-THUMB-DIV: "target-features"="+armv7e-m,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fpregs,+hwdiv,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp" +// CHECK-VFP5-D16-THUMB-DIV: "target-features"="+armv7e-m,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+hwdiv,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp" // RUN: %clang_cc1 -triple armv7-linux-gnueabi -target-cpu cortex-r4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-THUMB-DIV @@ -105,6 +105,6 @@ // CHECK-ARMV8M-M23-LINUX: "target-features"="+armv8-m.base,+hwdiv,+thumb-mode" // RUN: %clang_cc1 -triple thumb-linux-gnueabi -target-cpu cortex-m33 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-ARMV8M-MAIN-LINUX -// CHECK-ARMV8M-MAIN-LINUX: "target-features"="+armv8-m.main,+dsp,+fp-armv8d16sp,+fp16,+fpregs,+hwdiv,+thumb-mode,+vfp2sp,+vfp3d16sp,+vfp4d16sp" +// CHECK-ARMV8M-MAIN-LINUX: "target-features"="+armv8-m.main,+dsp,+fp-armv8d16sp,+fp16,+hwdiv,+thumb-mode,+vfp2sp,+vfp3d16sp,+vfp4d16sp" void foo() {} diff --git a/clang/test/CodeGen/mempcpy-libcall.c b/clang/test/CodeGen/mempcpy-libcall.c new file mode 100644 index 0000000000000..b88f494f164df --- /dev/null +++ b/clang/test/CodeGen/mempcpy-libcall.c @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -emit-llvm < %s| FileCheck %s + +typedef __SIZE_TYPE__ size_t; + +void *mempcpy(void *, void const *, size_t); + +char *test(char *d, char *s, size_t n) { + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}} %[[REG1:[^ ]+]], i8* {{.*}} %1, i64 %[[REG2:[^ ]+]], i1 false) + // CHECK-NEXT: %[[REGr:[^ ]+]] = getelementptr inbounds i8, i8* %[[REG1]], i64 %[[REG2]] + // CHECK-NEXT: ret i8* %[[REGr]] + return mempcpy(d, s, n); +} diff --git a/clang/test/Driver/arm-mfpu.c b/clang/test/Driver/arm-mfpu.c index b709622f72075..c3731fa5bd635 100644 --- a/clang/test/Driver/arm-mfpu.c +++ b/clang/test/Driver/arm-mfpu.c @@ -84,7 +84,7 @@ // CHECK-VFP3-D16-DAG: "-target-feature" "-vfp4d16sp" // CHECK-VFP3-D16-DAG: "-target-feature" "-fp-armv8d16sp" // CHECK-VFP3-D16-DAG: "-target-feature" "+fp64" -// CHECK-VFP3-D16-NOT: "-target-feature" "+d32" +// CHECK-VFP3-D16-DAG: "-target-feature" "-d32" // CHECK-VFP3-D16-DAG: "-target-feature" "-neon" // RUN: %clang -target arm-linux-eabi -mfpu=vfpv3-d16-fp16 %s -### -o %t.o 2>&1 \ @@ -98,7 +98,7 @@ // CHECK-VFP3-D16-FP16-DAG: "-target-feature" "-vfp4d16sp" // CHECK-VFP3-D16-FP16-DAG: "-target-feature" "-fp-armv8d16sp" // CHECK-VFP3-D16-FP16-DAG: "-target-feature" "+fp64" -// CHECK-VFP3-D16-FP16-NOT: "-target-feature" "+d32" +// CHECK-VFP3-D16-FP16-DAG: "-target-feature" "-d32" // CHECK-VFP3-D16-FP16-DAG: "-target-feature" "-neon" // CHECK-VFP3-D16-FP16-DAG: "-target-feature" "-crypto" @@ -108,8 +108,8 @@ // RUN: | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-3 %s // CHECK-VFP3XD-NOT: "-target-feature" "+soft-float" // CHECK-VFP3XD-DAG: "-target-feature" "+soft-float-abi" -// CHECK-VFP3XD-NOT: "-target-feature" "+fp64" -// CHECK-VFP3XD-NOT: "-target-feature" "+d32" +// CHECK-VFP3XD-DAG: "-target-feature" "-fp64" +// CHECK-VFP3XD-DAG: "-target-feature" "-d32" // CHECK-VFP3XD-DAG: "-target-feature" "+vfp3d16sp" // CHECK-VFP3XD-DAG: "-target-feature" "-fp16" // CHECK-VFP3XD-DAG: "-target-feature" "-vfp4d16sp" @@ -127,8 +127,8 @@ // CHECK-VFP3XD-FP16-DAG: "-target-feature" "+fp16" // CHECK-VFP3XD-FP16-DAG: "-target-feature" "-vfp4d16sp" // CHECK-VFP3XD-FP16-DAG: "-target-feature" "-fp-armv8d16sp" -// CHECK-VFP3XD-FP16-NOT: "-target-feature" "+fp64" -// CHECK-VFP3XD-FP16-NOT: "-target-feature" "+d32" +// CHECK-VFP3XD-FP16-DAG: "-target-feature" "-fp64" +// CHECK-VFP3XD-FP16-DAG: "-target-feature" "-d32" // CHECK-VFP3XD-FP16-DAG: "-target-feature" "-neon" // CHECK-VFP3XD-FP16-DAG: "-target-feature" "-crypto" @@ -162,7 +162,7 @@ // CHECK-VFP4-D16-DAG: "-target-feature" "+vfp4d16" // CHECK-VFP4-D16-DAG: "-target-feature" "-fp-armv8d16sp" // CHECK-VFP4-D16-DAG: "-target-feature" "+fp64" -// CHECK-VFP4-D16-NOT: "-target-feature" "+d32" +// CHECK-VFP4-D16-DAG: "-target-feature" "-d32" // CHECK-VFP4-D16-DAG: "-target-feature" "-neon" // RUN: %clang -target arm-linux-eabi -mfpu=fp4-sp-d16 %s -### -o %t.o 2>&1 \ @@ -175,8 +175,8 @@ // CHECK-FP4-SP-D16-DAG: "-target-feature" "+soft-float-abi" // CHECK-FP4-SP-D16-DAG: "-target-feature" "+vfp4d16sp" // CHECK-FP4-SP-D16-DAG: "-target-feature" "-fp-armv8d16sp" -// CHECK-FP4-SP-D16-NOT: "-target-feature" "+fp64" -// CHECK-FP4-SP-D16-NOT: "-target-feature" "+d32" +// CHECK-FP4-SP-D16-DAG: "-target-feature" "-fp64" +// CHECK-FP4-SP-D16-DAG: "-target-feature" "-d32" // CHECK-FP4-SP-D16-DAG: "-target-feature" "-neon" // RUN: %clang -target arm-linux-eabi -mfpu=fp5-sp-d16 %s -### -o %t.o 2>&1 \ @@ -189,8 +189,8 @@ // CHECK-FP5-SP-D16-DAG: "-target-feature" "+soft-float-abi" // CHECK-FP5-SP-D16-DAG: "-target-feature" "+fp-armv8d16sp" // CHECK-FP5-SP-D16-DAG: "-target-feature" "-neon" -// CHECK-FP5-SP-D16-NOT: "-target-feature" "+fp64" -// CHECK-FP5-SP-D16-NOT: "-target-feature" "+d32" +// CHECK-FP5-SP-D16-DAG: "-target-feature" "-fp64" +// CHECK-FP5-SP-D16-DAG: "-target-feature" "-d32" // CHECK-FP5-SP-D16-DAG: "-target-feature" "-crypto" // RUN: %clang -target arm-linux-eabi -mfpu=fp5-dp-d16 %s -### -o %t.o 2>&1 \ @@ -203,7 +203,7 @@ // CHECK-FP5-DP-D16-DAG: "-target-feature" "+soft-float-abi" // CHECK-FP5-DP-D16-DAG: "-target-feature" "+fp-armv8d16" // CHECK-FP5-DP-D16-DAG: "-target-feature" "+fp64" -// CHECK-FP5-DP-D16-NOT: "-target-feature" "+d32" +// CHECK-FP5-DP-D16-DAG: "-target-feature" "-d32" // CHECK-FP5-DP-D16-DAG: "-target-feature" "-neon" // CHECK-FP5-DP-D16-DAG: "-target-feature" "-crypto" // CHECK-SOFT-ABI-FP-5-DAG: "-target-feature" "+soft-float" @@ -323,8 +323,8 @@ // CHECK-NO-FP-DAG: "-target-feature" "-vfp3d16sp" // CHECK-NO-FP-DAG: "-target-feature" "-vfp4d16sp" // CHECK-NO-FP-DAG: "-target-feature" "-fp-armv8d16sp" -// CHECK-NO-FP-NOT: "-target-feature" "+fp64" -// CHECK-NO-FP-NOT: "-target-feature" "+d32" +// CHECK-NO-FP-DAG: "-target-feature" "-fp64" +// CHECK-NO-FP-DAG: "-target-feature" "-d32" // CHECK-NO-FP-DAG: "-target-feature" "-neon" // CHECK-NO-FP-DAG: "-target-feature" "-crypto" @@ -382,8 +382,8 @@ // CHECK-ARM7-ANDROID-FP-DEFAULT-NOT: "-target-feature" "+soft-float" // CHECK-ARM7-ANDROID-FP-DEFAULT-DAG: "-target-feature" "+soft-float-abi" // CHECK-ARM7-ANDROID-FP-DEFAULT-DAG: "-target-feature" "+vfp3" -// CHECK-ARM7-ANDROID-FP-DEFAULT-NOT: "-target-feature" "+vfp4" -// CHECK-ARM7-ANDROID-FP-DEFAULT-NOT: "-target-feature" "+fp-armv8" +// CHECK-ARM7-ANDROID-FP-DEFAULT-DAG: "-target-feature" "-vfp4" +// CHECK-ARM7-ANDROID-FP-DEFAULT-DAG: "-target-feature" "-fp-armv8" // CHECK-ARM7-ANDROID-FP-DEFAULT-DAG: "-target-feature" "+neon" // CHECK-ARM7-ANDROID-FP-DEFAULT-NOT: "-target-feature" "+crypto" @@ -391,7 +391,7 @@ // RUN: | FileCheck --check-prefix=CHECK-ARM7-ANDROID-FP-D16 %s // CHECK-ARM7-ANDROID-FP-D16-NOT: "-target-feature" "+soft-float" // CHECK-ARM7-ANDROID-FP-D16-DAG: "-target-feature" "+soft-float-abi" -// CHECK-ARM7-ANDROID-FP-D16-NOT: "-target-feature" "+d32" +// CHECK-ARM7-ANDROID-FP-D16-DAG: "-target-feature" "-d32" // CHECK-ARM7-ANDROID-FP-D16-DAG: "-target-feature" "+vfp3d16" // CHECK-ARM7-ANDROID-FP-D16-NOT: "-target-feature" "+vfp4" // CHECK-ARM7-ANDROID-FP-D16-NOT: "-target-feature" "+fp-armv8" @@ -403,3 +403,23 @@ // CHECK-SOFTFLOATABI-INHIBITS-MVE-NOT: "-target-feature" "+mve" // CHECK-SOFTFLOATABI-INHIBITS-MVE-DAG: "-target-feature" "-mve" // CHECK-SOFTFLOATABI-INHIBITS-MVE-DAG: "-target-feature" "-mve.fp" + +// RUN: %clang -target arm-none-none-eabi %s -march=armv8.1-m.main+mve.fp -mfpu=none -### -c 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-MVEFP-FPUNONE %s +// CHECK-MVEFP-FPUNONE-DAG: "-target-feature" "-vfp2sp" +// CHECK-MVEFP-FPUNONE-DAG: "-target-feature" "-vfp3d16sp" +// CHECK-MVEFP-FPUNONE-DAG: "-target-feature" "-vfp4d16sp" +// CHECK-MVEFP-FPUNONE-DAG: "-target-feature" "-fp-armv8d16sp" +// CHECK-MVEFP-FPUNONE-DAG: "-target-feature" "-fp64" +// CHECK-MVEFP-FPUNONE-DAG: "-target-feature" "-d32" +// CHECK-MVEFP-FPUNONE-DAG: "-target-feature" "-neon" +// CHECK-MVEFP-FPUNONE-DAG: "-target-feature" "-crypto" +// CHECK-MVEFP-FPUNONE-DAG: "-target-feature" "-mve.fp" +// CHECK-MVEFP-FPUNONE-NOT: "-target-feature" "-fpregs" + + +// RUN: %clang -target arm-none-none-eabi %s -march=armv8.1-m.main+mve -mfpu=none -### -c 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-MVEI-FPUNONE %s +// CHECK-MVEI-FPUNONE-DAG: "-target-feature" "-mve.fp" +// CHECK-MVEI-FPUNONE-DAG: "-target-feature" "+mve" +// CHECK-MVEI-FPUNONE-NOT: "-target-feature" "-fpregs" diff --git a/clang/test/Parser/cxx-concepts-ambig-constraint-expr.cpp b/clang/test/Parser/cxx-concepts-ambig-constraint-expr.cpp index 12ab338a6b00a..1cd2605ce0556 100644 --- a/clang/test/Parser/cxx-concepts-ambig-constraint-expr.cpp +++ b/clang/test/Parser/cxx-concepts-ambig-constraint-expr.cpp @@ -5,25 +5,5 @@ // the syntax is consumed without backtracking. // type-specifier-seq in conversion-type-id -template requires (bool)&T::operator short -unsigned int foo(); // expected-error {{C++ requires a type specifier for all declarations}} - -// type-specifier-seq in new-type-id -template requires (bool)sizeof new (T::f()) short -unsigned int bar(); // expected-error {{C++ requires a type specifier for all declarations}} - -template requires (bool)sizeof new (T::f()) unsigned // expected-error {{'struct' cannot be signed or unsigned}} -struct X { }; // expected-error {{'X' cannot be defined in a type specifier}} - -// C-style cast -// of function call on function-style cast -template requires (bool(T())) -T (*fp)(); // expected-error {{use of undeclared identifier 'fp'}} - -// function-style cast -// as the callee in a function call -struct A { - static int t; - template requires bool(T()) - (A(T (&t))) { } // expected-error {{called object type 'bool' is not a function or function pointer}} -}; +template requires T::operator short +unsigned int foo(); // expected-error {{C++ requires a type specifier for all declarations}} \ No newline at end of file diff --git a/clang/test/Parser/cxx-concepts-requires-clause.cpp b/clang/test/Parser/cxx-concepts-requires-clause.cpp index 01893a94cbc94..60e7004e08187 100644 --- a/clang/test/Parser/cxx-concepts-requires-clause.cpp +++ b/clang/test/Parser/cxx-concepts-requires-clause.cpp @@ -1,13 +1,11 @@ -// RUN: %clang_cc1 -std=c++14 -fconcepts-ts -x c++ %s -verify -// expected-no-diagnostics +// RUN: %clang_cc1 -std=c++2a -fconcepts-ts -x c++ %s -verify // Test parsing of the optional requires-clause in a template-declaration. template requires true void foo() { } - -template requires !0 +template requires (!0) struct A { void foo(); struct AA; @@ -27,31 +25,30 @@ struct A { using MQ = M; }; -template requires !0 +template requires (!0) void A::foo() { } -template requires !0 +template requires (!0) struct A::AA { }; -template requires !0 +template requires (!0) enum A::E : int { E0 }; -template requires !0 +template requires (!0) int A::x = 0; -template requires !0 +template requires (!0) template requires true void A::Mfoo() { } -template requires !0 +template requires (!0) template requires true struct A::M { }; -template requires !0 +template requires (!0) template requires true int A::Mx = 0; - template requires true int x = 0; @@ -80,3 +77,81 @@ struct C::M { }; template requires true int C::Mx = 0; + +// Test behavior with non-primary-expression requires clauses + +template requires foo() +// expected-error@-1{{parentheses are required around this expression in a requires clause}} +struct B1 { }; + +int func() { } + +template requires func() +// expected-error@-1{{atomic constraint must be of type 'bool' (found '')}} +// expected-note@-2{{parentheses are required around this expression in a requires clause}} +struct B2 { }; + +template requires (foo()) +struct B3 { }; + +template requires T{} +// expected-error@-1{{parentheses are required around this expression in a requires clause}} +struct B4 { }; + +template requires sizeof(T) == 0 +// expected-error@-1{{parentheses are required around this expression in a requires clause}} +struct B5 { }; + +template requires (sizeof(T)) == 0 +// expected-error@-1{{parentheses are required around this expression in a requires clause}} +struct B6 { }; + +template requires 0 +// expected-error@-1{{atomic constraint must be of type 'bool' (found 'int')}} +(int) bar() { }; + +template requires foo +(int) bar() { }; +// expected-error@-1{{expected '(' for function-style cast or type construction}} + +template +void bar() requires foo(); +// expected-error@-1{{parentheses are required around this expression in a requires clause}} + +template +void bar() requires (foo()); + +template +void bar() requires func(); +// expected-error@-1{{atomic constraint must be of type 'bool' (found '')}} +// expected-note@-2{{parentheses are required around this expression in a requires clause}} + +template +void bar() requires T{}; +// expected-error@-1{{parentheses are required around this expression in a requires clause}} + +template +void bar() requires sizeof(T) == 0; +// expected-error@-1{{parentheses are required around this expression in a requires clause}} + +template +void bar() requires (sizeof(T)) == 0; +// expected-error@-1{{parentheses are required around this expression in a requires clause}} + +void bar(int x, int y) requires (x, y, true); + +struct B { + int x; + void foo(int y) requires (x, this, this->x, y, true); + static void bar(int y) requires (x, true); + // expected-error@-1{{'this' cannot be implicitly used in a static member function declaration}} + static void baz(int y) requires (this, true); + // expected-error@-1{{'this' cannot be used in a static member function declaration}} +}; + +auto lambda1 = [] (auto x) requires (sizeof(decltype(x)) == 1) { }; + +auto lambda2 = [] (auto x) constexpr -> int requires (sizeof(decltype(x)) == 1) { return 0; }; + +auto lambda3 = [] requires (sizeof(char) == 1) { }; +// expected-error@-1{{lambda requires '()' before 'requires' clause}} \ No newline at end of file diff --git a/clang/test/Preprocessor/init.c b/clang/test/Preprocessor/init.c index 4d8c6e5c46b63..a03725889360e 100644 --- a/clang/test/Preprocessor/init.c +++ b/clang/test/Preprocessor/init.c @@ -6551,10 +6551,11 @@ // PPC32-LINUX-NOT: _CALL_LINUX // // RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc-unknown-linux-gnu -target-feature +spe < /dev/null | FileCheck -match-full-lines -check-prefix PPC32-SPE %s +// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpcspe-unknown-linux-gnu < /dev/null | FileCheck -match-full-lines -check-prefix PPC32-SPE %s // // PPC32-SPE:#define __NO_FPRS__ 1 // PPC32-SPE:#define __SPE__ 1 -// +// // RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc-unknown-linux-gnu -target-cpu 8548 < /dev/null | FileCheck -match-full-lines -check-prefix PPC8548 %s // // PPC8548:#define __NO_FPRS__ 1 diff --git a/clang/test/Sema/arm-mve-immediates.c b/clang/test/Sema/arm-mve-immediates.c index 54cdb96efcd3b..b8106fbb70282 100644 --- a/clang/test/Sema/arm-mve-immediates.c +++ b/clang/test/Sema/arm-mve-immediates.c @@ -110,3 +110,96 @@ void test_lane_indices(uint8x16_t v16, uint16x8_t v8, vsetq_lane_u64(23, v2, 1); vsetq_lane_u64(23, v2, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} } + +void test_immediate_shifts(uint8x16_t vb, uint16x8_t vh, uint32x4_t vw) +{ + vshlq_n(vb, 0); + vshlq_n(vb, 7); + vshlq_n(vh, 0); + vshlq_n(vh, 15); + vshlq_n(vw, 0); + vshlq_n(vw, 31); + + vshlq_n(vb, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}} + vshlq_n(vb, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + vshlq_n(vh, -1); // expected-error {{argument value -1 is outside the valid range [0, 15]}} + vshlq_n(vh, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + vshlq_n(vw, -1); // expected-error {{argument value -1 is outside the valid range [0, 31]}} + vshlq_n(vw, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + + vqshlq_n(vb, 0); + vqshlq_n(vb, 7); + vqshlq_n(vh, 0); + vqshlq_n(vh, 15); + vqshlq_n(vw, 0); + vqshlq_n(vw, 31); + + vqshlq_n(vb, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}} + vqshlq_n(vb, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + vqshlq_n(vh, -1); // expected-error {{argument value -1 is outside the valid range [0, 15]}} + vqshlq_n(vh, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + vqshlq_n(vw, -1); // expected-error {{argument value -1 is outside the valid range [0, 31]}} + vqshlq_n(vw, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + + vsliq(vb, vb, 0); + vsliq(vb, vb, 7); + vsliq(vh, vh, 0); + vsliq(vh, vh, 15); + vsliq(vw, vw, 0); + vsliq(vw, vw, 31); + + vsliq(vb, vb, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}} + vsliq(vb, vb, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + vsliq(vh, vh, -1); // expected-error {{argument value -1 is outside the valid range [0, 15]}} + vsliq(vh, vh, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + vsliq(vw, vw, -1); // expected-error {{argument value -1 is outside the valid range [0, 31]}} + vsliq(vw, vw, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + + vshllbq(vb, 1); + vshllbq(vb, 8); + vshllbq(vh, 1); + vshllbq(vh, 16); + + vshllbq(vb, 0); // expected-error {{argument value 0 is outside the valid range [1, 8]}} + vshllbq(vb, 9); // expected-error {{argument value 9 is outside the valid range [1, 8]}} + vshllbq(vh, 0); // expected-error {{argument value 0 is outside the valid range [1, 16]}} + vshllbq(vh, 17); // expected-error {{argument value 17 is outside the valid range [1, 16]}} + + vshrq(vb, 1); + vshrq(vb, 8); + vshrq(vh, 1); + vshrq(vh, 16); + vshrq(vw, 1); + vshrq(vw, 32); + + vshrq(vb, 0); // expected-error {{argument value 0 is outside the valid range [1, 8]}} + vshrq(vb, 9); // expected-error {{argument value 9 is outside the valid range [1, 8]}} + vshrq(vh, 0); // expected-error {{argument value 0 is outside the valid range [1, 16]}} + vshrq(vh, 17); // expected-error {{argument value 17 is outside the valid range [1, 16]}} + vshrq(vw, 0); // expected-error {{argument value 0 is outside the valid range [1, 32]}} + vshrq(vw, 33); // expected-error {{argument value 33 is outside the valid range [1, 32]}} + + vshrntq(vb, vh, 1); + vshrntq(vb, vh, 8); + vshrntq(vh, vw, 1); + vshrntq(vh, vw, 16); + + vshrntq(vb, vh, 0); // expected-error {{argument value 0 is outside the valid range [1, 8]}} + vshrntq(vb, vh, 9); // expected-error {{argument value 9 is outside the valid range [1, 8]}} + vshrntq(vh, vw, 0); // expected-error {{argument value 0 is outside the valid range [1, 16]}} + vshrntq(vh, vw, 17); // expected-error {{argument value 17 is outside the valid range [1, 16]}} + + vsriq(vb, vb, 1); + vsriq(vb, vb, 8); + vsriq(vh, vh, 1); + vsriq(vh, vh, 16); + vsriq(vw, vw, 1); + vsriq(vw, vw, 32); + + vsriq(vb, vb, 0); // expected-error {{argument value 0 is outside the valid range [1, 8]}} + vsriq(vb, vb, 9); // expected-error {{argument value 9 is outside the valid range [1, 8]}} + vsriq(vh, vh, 0); // expected-error {{argument value 0 is outside the valid range [1, 16]}} + vsriq(vh, vh, 17); // expected-error {{argument value 17 is outside the valid range [1, 16]}} + vsriq(vw, vw, 0); // expected-error {{argument value 0 is outside the valid range [1, 32]}} + vsriq(vw, vw, 33); // expected-error {{argument value 33 is outside the valid range [1, 32]}} +} diff --git a/clang/test/SemaTemplate/instantiate-requires-clause.cpp b/clang/test/SemaTemplate/instantiate-requires-clause.cpp new file mode 100644 index 0000000000000..f36396b98db73 --- /dev/null +++ b/clang/test/SemaTemplate/instantiate-requires-clause.cpp @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -std=c++2a -fconcepts-ts -x c++ %s -verify + +template requires ((sizeof(Args) == 1), ...) +// expected-note@-1 {{because '(sizeof(int) == 1) , (sizeof(char) == 1) , (sizeof(int) == 1)' evaluated to false}} +void f1(Args&&... args) { } +// expected-note@-1 {{candidate template ignored: constraints not satisfied [with Args = ]}} + +using f11 = decltype(f1('a')); +using f12 = decltype(f1(1, 'b')); +using f13 = decltype(f1(1, 'b', 2)); +// expected-error@-1 {{no matching function for call to 'f1'}} + +template +void f2(Args&&... args) requires ((sizeof(args) == 1), ...) { } +// expected-note@-1 {{candidate template ignored: constraints not satisfied [with Args = ]}} +// expected-note@-2 {{because '(sizeof (args) == 1) , (sizeof (args) == 1) , (sizeof (args) == 1)' evaluated to false}} + +using f21 = decltype(f2('a')); +using f22 = decltype(f2(1, 'b')); +using f23 = decltype(f2(1, 'b', 2)); +// expected-error@-1 {{no matching function for call to 'f2'}} + +template requires ((sizeof(Args) == 1), ...) +// expected-note@-1 {{because '(sizeof(int) == 1) , (sizeof(char) == 1) , (sizeof(int) == 1)' evaluated to false}} +void f3(Args&&... args) requires ((sizeof(args) == 1), ...) { } +// expected-note@-1 {{candidate template ignored: constraints not satisfied [with Args = ]}} + +using f31 = decltype(f3('a')); +using f32 = decltype(f3(1, 'b')); +using f33 = decltype(f3(1, 'b', 2)); +// expected-error@-1 {{no matching function for call to 'f3'}} diff --git a/compiler-rt/test/cfi/cross-dso/stats.cpp b/compiler-rt/test/cfi/cross-dso/stats.cpp index 09a7217bf066a..9d8c2ee3e0d95 100644 --- a/compiler-rt/test/cfi/cross-dso/stats.cpp +++ b/compiler-rt/test/cfi/cross-dso/stats.cpp @@ -22,24 +22,24 @@ extern "C" void nvcall(A *a); #ifdef SHARED_LIB extern "C" __attribute__((noinline)) void vcall(A *a) { - // CHECK: stats.cpp:[[@LINE+1]] vcall.cfi cfi-vcall 37 + // CHECK-DAG: stats.cpp:[[@LINE+1]] vcall.cfi cfi-vcall 37 a->vf(); } extern "C" __attribute__((noinline)) void nvcall(A *a) { - // CHECK: stats.cpp:[[@LINE+1]] nvcall.cfi cfi-nvcall 51 + // CHECK-DAG: stats.cpp:[[@LINE+1]] nvcall.cfi cfi-nvcall 51 a->nvf(); } #else extern "C" __attribute__((noinline)) A *dcast(A *a) { - // CHECK: stats.cpp:[[@LINE+1]] dcast.cfi cfi-derived-cast 24 + // CHECK-DAG: stats.cpp:[[@LINE+1]] dcast.cfi cfi-derived-cast 24 return (A *)(ABase *)a; } extern "C" __attribute__((noinline)) A *ucast(A *a) { - // CHECK: stats.cpp:[[@LINE+1]] ucast.cfi cfi-unrelated-cast 81 + // CHECK-DAG: stats.cpp:[[@LINE+1]] ucast.cfi cfi-unrelated-cast 81 return (A *)(char *)a; } diff --git a/libcxx/test/std/utilities/function.objects/refwrap/weak_result.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/weak_result.pass.cpp index a3577ff064275..848858d33b19f 100644 --- a/libcxx/test/std/utilities/function.objects/refwrap/weak_result.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/refwrap/weak_result.pass.cpp @@ -12,6 +12,8 @@ // has weak result type +// REQUIRES: c++98 || c++03 || c++11 || c++14 || c++17 + #include #include diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt index 0ddcd5f971f7f..f3698e9c46e1b 100644 --- a/libcxxabi/CMakeLists.txt +++ b/libcxxabi/CMakeLists.txt @@ -92,6 +92,14 @@ usual symlinks pointing to that.") option(LIBCXXABI_ENABLE_SHARED "Build libc++abi as a shared library." ON) option(LIBCXXABI_ENABLE_STATIC "Build libc++abi as a static library." ON) +option(LIBCXXABI_LINK_TESTS_WITH_SHARED_LIBCXXABI + "Whether the libc++abi tests should link with the shared libc++abi library" + ${LIBCXXABI_ENABLE_SHARED}) + +option(LIBCXXABI_LINK_TESTS_WITH_SHARED_LIBCXX + "Whether the libc++abi tests should link with the shared libc++ library" + ${LIBCXX_ENABLE_SHARED}) + cmake_dependent_option(LIBCXXABI_INSTALL_STATIC_LIBRARY "Install the static libc++abi library." ON "LIBCXXABI_ENABLE_STATIC;LIBCXXABI_INSTALL_LIBRARY" OFF) @@ -115,6 +123,26 @@ if (NOT LIBCXXABI_ENABLE_SHARED AND NOT LIBCXXABI_ENABLE_STATIC) message(FATAL_ERROR "libc++abi must be built as either a shared or static library.") endif() +if(LIBCXXABI_LINK_TESTS_WITH_SHARED_LIBCXXABI AND NOT LIBCXXABI_ENABLE_SHARED) + message(FATAL_ERROR "LIBCXXABI_LINK_TESTS_WITH_SHARED_LIBCXXABI being ON requires LIBCXXABI_ENABLE_SHARED to be ON") +endif() + +if(NOT LIBCXXABI_LINK_TESTS_WITH_SHARED_LIBCXXABI AND NOT LIBCXXABI_ENABLE_STATIC) + message(FATAL_ERROR "LIBCXXABI_LINK_TESTS_WITH_SHARED_LIBCXXABI being OFF requires LIBCXXABI_ENABLE_STATIC to be ON") +endif() + +if(DEFINED LIBCXX_ENABLE_SHARED + AND LIBCXXABI_LINK_TESTS_WITH_SHARED_LIBCXX + AND NOT LIBCXX_ENABLE_SHARED) + message(FATAL_ERROR "LIBCXXABI_LINK_TESTS_WITH_SHARED_LIBCXX being ON requires LIBCXX_ENABLE_SHARED to be ON") +endif() + +if(DEFINED LIBCXX_ENABLE_STATIC + AND NOT LIBCXXABI_LINK_TESTS_WITH_SHARED_LIBCXX + AND NOT LIBCXX_ENABLE_STATIC) + message(FATAL_ERROR "LIBCXXABI_LINK_TESTS_WITH_SHARED_LIBCXX being OFF requires LIBCXX_ENABLE_STATIC to be ON") +endif() + if (LLVM_EXTERNAL_LIBCXX_SOURCE_DIR) set(LIBCXXABI_LIBCXX_SRC_DIRS ${LLVM_EXTERNAL_LIBCXX_SOURCE_DIR}) else() @@ -209,7 +237,7 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LIBCXXABI_LIBRARY_DIR}) # directory. if (NOT LIBCXXABI_LIBCXX_LIBRARY_PATH) set(LIBCXXABI_LIBCXX_LIBRARY_PATH "${LIBCXXABI_LIBRARY_DIR}" CACHE PATH - "The path to libc++ library.") + "The path to libc++ library." FORCE) endif() # Check that we can build with 32 bits if requested. diff --git a/libcxxabi/test/CMakeLists.txt b/libcxxabi/test/CMakeLists.txt index 60e052d20710c..23c0dac5b8568 100644 --- a/libcxxabi/test/CMakeLists.txt +++ b/libcxxabi/test/CMakeLists.txt @@ -20,6 +20,8 @@ pythonize_bool(LIBCXXABI_USE_LLVM_UNWINDER) pythonize_bool(LIBCXXABI_USE_COMPILER_RT) pythonize_bool(LIBCXXABI_BUILD_EXTERNAL_THREAD_LIBRARY) pythonize_bool(LIBCXX_ENABLE_PARALLEL_ALGORITHMS) +pythonize_bool(LIBCXXABI_LINK_TESTS_WITH_SHARED_LIBCXX) +pythonize_bool(LIBCXXABI_LINK_TESTS_WITH_SHARED_LIBCXXABI) set(LIBCXXABI_TARGET_INFO "libcxx.test.target_info.LocalTI" CACHE STRING "TargetInfo to use when setting up test environment.") set(LIBCXXABI_EXECUTOR "None" CACHE STRING diff --git a/libcxxabi/test/lit.site.cfg.in b/libcxxabi/test/lit.site.cfg.in index 8ac2fa45cab0b..8125e89a4533c 100644 --- a/libcxxabi/test/lit.site.cfg.in +++ b/libcxxabi/test/lit.site.cfg.in @@ -16,8 +16,8 @@ config.sanitizer_library = "@LIBCXXABI_SANITIZER_LIBRARY@" config.enable_32bit = @LIBCXXABI_BUILD_32_BITS@ config.target_info = "@LIBCXXABI_TARGET_INFO@" config.executor = "@LIBCXXABI_EXECUTOR@" -config.libcxxabi_shared = @LIBCXXABI_ENABLE_SHARED@ -config.enable_shared = @LIBCXX_ENABLE_SHARED@ +config.libcxxabi_shared = @LIBCXXABI_LINK_TESTS_WITH_SHARED_LIBCXXABI@ +config.enable_shared = @LIBCXXABI_LINK_TESTS_WITH_SHARED_LIBCXX@ config.enable_exceptions = @LIBCXXABI_ENABLE_EXCEPTIONS@ config.host_triple = "@LLVM_HOST_TRIPLE@" config.target_triple = "@TARGET_TRIPLE@" diff --git a/lld/ELF/Arch/Hexagon.cpp b/lld/ELF/Arch/Hexagon.cpp index d29932dd42a2e..106bc9bab5bd2 100644 --- a/lld/ELF/Arch/Hexagon.cpp +++ b/lld/ELF/Arch/Hexagon.cpp @@ -54,6 +54,7 @@ Hexagon::Hexagon() { // Hexagon Linux uses 64K pages by default. defaultMaxPageSize = 0x10000; noneRel = R_HEX_NONE; + tlsGotRel = R_HEX_TPREL_32; } uint32_t Hexagon::calcEFlags() const { @@ -115,6 +116,11 @@ RelExpr Hexagon::getRelExpr(RelType type, const Symbol &s, case R_HEX_B22_PCREL_X: case R_HEX_B32_PCREL_X: return R_PLT_PC; + case R_HEX_IE_32_6_X: + case R_HEX_IE_16_X: + case R_HEX_IE_HI16: + case R_HEX_IE_LO16: + return R_GOT; case R_HEX_GOTREL_11_X: case R_HEX_GOTREL_16_X: case R_HEX_GOTREL_32_6_X: @@ -125,6 +131,13 @@ RelExpr Hexagon::getRelExpr(RelType type, const Symbol &s, case R_HEX_GOT_16_X: case R_HEX_GOT_32_6_X: return R_GOTPLT; + case R_HEX_IE_GOT_11_X: + case R_HEX_IE_GOT_16_X: + case R_HEX_IE_GOT_32_6_X: + case R_HEX_IE_GOT_HI16: + case R_HEX_IE_GOT_LO16: + config->hasStaticTlsModel = true; + return R_GOTPLT; case R_HEX_TPREL_11_X: case R_HEX_TPREL_16: case R_HEX_TPREL_16_X: @@ -227,6 +240,7 @@ void Hexagon::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { or32le(loc, applyMask(0x00203fe0, val & 0x3f)); break; case R_HEX_11_X: + case R_HEX_IE_GOT_11_X: case R_HEX_GOT_11_X: case R_HEX_GOTREL_11_X: case R_HEX_TPREL_11_X: @@ -236,6 +250,8 @@ void Hexagon::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { or32le(loc, applyMask(0x000007e0, val)); break; case R_HEX_16_X: // These relocs only have 6 effective bits. + case R_HEX_IE_16_X: + case R_HEX_IE_GOT_16_X: case R_HEX_GOT_16_X: case R_HEX_GOTREL_16_X: case R_HEX_TPREL_16_X: @@ -251,6 +267,8 @@ void Hexagon::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_HEX_32_6_X: case R_HEX_GOT_32_6_X: case R_HEX_GOTREL_32_6_X: + case R_HEX_IE_GOT_32_6_X: + case R_HEX_IE_32_6_X: case R_HEX_TPREL_32_6_X: or32le(loc, applyMask(0x0fff3fff, val >> 6)); break; @@ -285,11 +303,15 @@ void Hexagon::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { break; case R_HEX_GOTREL_HI16: case R_HEX_HI16: + case R_HEX_IE_GOT_HI16: + case R_HEX_IE_HI16: case R_HEX_TPREL_HI16: or32le(loc, applyMask(0x00c03fff, val >> 16)); break; case R_HEX_GOTREL_LO16: case R_HEX_LO16: + case R_HEX_IE_GOT_LO16: + case R_HEX_IE_LO16: case R_HEX_TPREL_LO16: or32le(loc, applyMask(0x00c03fff, val)); break; diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 8025779d18dc1..1df8a157cfe5e 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -177,7 +177,9 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, return 1; } - bool canRelax = config->emachine != EM_ARM && config->emachine != EM_RISCV; + bool canRelax = config->emachine != EM_ARM && + config->emachine != EM_HEXAGON && + config->emachine != EM_RISCV; // If we are producing an executable and the symbol is non-preemptable, it // must be defined and the code sequence can be relaxed to use Local-Exec. diff --git a/lld/test/ELF/hexagon-tls-ie.s b/lld/test/ELF/hexagon-tls-ie.s new file mode 100644 index 0000000000000..ea05279473116 --- /dev/null +++ b/lld/test/ELF/hexagon-tls-ie.s @@ -0,0 +1,78 @@ +# REQUIRES: hexagon +# RUN: llvm-mc -filetype=obj -triple=hexagon-unknown-elf %s -o %t.o +# RUN: llvm-readobj -r %t.o | FileCheck -check-prefix=RELOC %s +# RUN: ld.lld %t.o -o %t +## shared needs -z notext because of the R_HEX_IE_16/32_X(R_GOT) static +## relocations +# RUN: ld.lld -z notext -shared %t.o -o %t.so +# RUN: llvm-objdump -d --no-show-raw-insn --print-imm-hex %t | FileCheck %s +# RUN: llvm-readobj -x .got %t | FileCheck -check-prefix=GOT %s +# RUN: llvm-objdump -d --no-show-raw-insn --print-imm-hex %t.so | \ +# RUN: FileCheck -check-prefix=SHARED %s +# RUN: llvm-readobj -r %t.so | FileCheck -check-prefix=RELA %s + + .globl _start + .type _start, @function +_start: + +# RELOC: 0x0 R_HEX_IE_32_6_X a 0x0 +# RELOC-NEXT: 0x4 R_HEX_IE_16_X a 0x0 +# CHECK: { immext(#0x30180) +# CHECK-NEXT: r2 = memw(##0x301a4) } + r2 = memw(##a@IE) + +# RELOC-NEXT: 0x8 R_HEX_IE_LO16 a 0x0 +# CHECK: { r2.l = #0x1a4 } + r2.l = #a@IE +# RELOC-NEXT: 0xC R_HEX_IE_HI16 a 0x0 +# CHECK: { r2.h = #0x3 } + r2.h = #a@IE + + +# GOT: Hex dump of section '.got': +# GOT-NEXT: 0x000301a4 f0ffffff f4ffffff f8ffffff fcffffff + r2 = memw(##a@IE) + r2 = memw(##b@IE) + r2 = memw(##c@IE) + r2 = memw(##d@IE) + +# RELOC: 0x30 R_HEX_IE_GOT_32_6_X a 0x0 +# RELOC-NEXT: 0x34 R_HEX_IE_GOT_16_X a 0x0 +# SHARED: { immext(#0xfffeffc0) +# SHARED-NEXT: r2 = memw(##0xfffefff0) } + r2 = memw(##a@IEGOT) + +# RELOC-NEXT: 0x38 R_HEX_IE_GOT_LO16 a 0x0 +# SHARED: { r2.l = #0xfff0 } + r2.l = #a@IEGOT +# RELOC-NEXT: 0x3C R_HEX_IE_GOT_HI16 a 0x0 +# SHARED: { r2.h = #0xfffe } + r2.h = #a@IEGOT + +# RELOC: 0x44 R_HEX_IE_GOT_11_X a 0x0 +# SHARED: { immext(#0xfffeffc0) +# SHARED-NEXT: r0 = !cmp.eq(r1,##-0x10010) } + r0=!cmp.eq(r1,##a@iegot) + +# RELA: 0x203C4 R_HEX_TPREL_32 a 0x0 +# RELA-NEXT: 0x203C8 R_HEX_TPREL_32 b 0x0 +# RELA-NEXT: 0x203CC R_HEX_TPREL_32 c 0x0 +# RELA-NEXT: 0x203D0 R_HEX_TPREL_32 d 0x0 + r2 = memw(##b@IEGOT) + r2 = memw(##c@IEGOT) + r2 = memw(##d@IEGOT) + + +.section .tdata,"awT",@progbits +.globl a +a: +.word 1 +.globl b +b: +.word 2 +.globl c +c: +.word 3 +.globl d +d: +.word 4 diff --git a/lldb/CMakeLists.txt b/lldb/CMakeLists.txt index 6170ab625c54d..573b8556989e4 100644 --- a/lldb/CMakeLists.txt +++ b/lldb/CMakeLists.txt @@ -56,7 +56,7 @@ if (LLDB_ENABLE_PYTHON) endif () if (LLDB_ENABLE_PYTHON OR LLDB_ENABLE_LUA) - add_subdirectory(scripts) + add_subdirectory(bindings) endif () # We need the headers generated by instrinsics_gen before we can compile @@ -97,7 +97,7 @@ if(LLDB_INCLUDE_TESTS) endif() if (LLDB_ENABLE_PYTHON) - get_target_property(lldb_scripts_dir swig_wrapper BINARY_DIR) + get_target_property(lldb_bindings_dir swig_wrapper BINARY_DIR) if(LLDB_BUILD_FRAMEWORK) set(lldb_python_build_path "${LLDB_FRAMEWORK_ABSOLUTE_BUILD_DIR}/LLDB.framework/Resources/Python/lldb") @@ -109,7 +109,7 @@ if (LLDB_ENABLE_PYTHON) # to liblldb.so for the Python API(hardlink on Windows). add_custom_target(finish_swig ALL VERBATIM COMMAND ${CMAKE_COMMAND} -E make_directory ${lldb_python_build_path} - DEPENDS ${lldb_scripts_dir}/lldb.py + DEPENDS ${lldb_bindings_dir}/lldb.py COMMENT "Python script sym-linking LLDB Python API") if(NOT LLDB_USE_SYSTEM_SIX) @@ -121,7 +121,7 @@ if (LLDB_ENABLE_PYTHON) add_custom_command(TARGET finish_swig POST_BUILD VERBATIM COMMAND ${CMAKE_COMMAND} -E copy - "${lldb_scripts_dir}/lldb.py" + "${lldb_bindings_dir}/lldb.py" "${lldb_python_build_path}/__init__.py") function(create_python_package pkg_dir) @@ -131,7 +131,7 @@ if (LLDB_ENABLE_PYTHON) endif() if(NOT ARG_NOINIT) set(init_cmd COMMAND ${PYTHON_EXECUTABLE} - "${LLDB_SOURCE_DIR}/scripts/Python/createPythonInit.py" + "${LLDB_SOURCE_DIR}/bindings/python/createPythonInit.py" "${pkg_dir}" ${ARG_FILES}) endif() add_custom_command(TARGET finish_swig POST_BUILD VERBATIM diff --git a/lldb/scripts/CMakeLists.txt b/lldb/bindings/CMakeLists.txt similarity index 93% rename from lldb/scripts/CMakeLists.txt rename to lldb/bindings/CMakeLists.txt index 515c63293bc20..92ae402c478e9 100644 --- a/lldb/scripts/CMakeLists.txt +++ b/lldb/bindings/CMakeLists.txt @@ -1,4 +1,4 @@ -file(GLOB SWIG_INTERFACES interface/*.i) +file(GLOB SWIG_INTERFACES interfaces/*.i) file(GLOB_RECURSE SWIG_SOURCES *.swig) file(GLOB SWIG_HEADERS ${LLDB_SOURCE_DIR}/include/lldb/API/*.h @@ -46,7 +46,7 @@ if (LLDB_ENABLE_PYTHON) -python -threads -o ${CMAKE_CURRENT_BINARY_DIR}/LLDBWrapPython.cpp - ${LLDB_SOURCE_DIR}/scripts/lldb.swig + ${LLDB_SOURCE_DIR}/bindings/python.swig VERBATIM COMMENT "Builds LLDB Python wrapper") @@ -67,7 +67,7 @@ if (LLDB_ENABLE_LUA) -lua -w503 -o ${CMAKE_CURRENT_BINARY_DIR}/LLDBWrapLua.cpp - ${LLDB_SOURCE_DIR}/scripts/lldb_lua.swig + ${LLDB_SOURCE_DIR}/bindings/lua.swig VERBATIM COMMENT "Builds LLDB Lua wrapper") diff --git a/lldb/scripts/headers.swig b/lldb/bindings/headers.swig similarity index 100% rename from lldb/scripts/headers.swig rename to lldb/bindings/headers.swig diff --git a/lldb/scripts/interface/SBAddress.i b/lldb/bindings/interface/SBAddress.i similarity index 99% rename from lldb/scripts/interface/SBAddress.i rename to lldb/bindings/interface/SBAddress.i index 6c5352bac6d7c..4658534d153ea 100644 --- a/lldb/scripts/interface/SBAddress.i +++ b/lldb/bindings/interface/SBAddress.i @@ -140,6 +140,8 @@ public: lldb::SBLineEntry GetLineEntry (); + STRING_EXTENSION(SBAddress) + #ifdef SWIGPYTHON %pythoncode %{ def __get_load_addr_property__ (self): diff --git a/lldb/scripts/interface/SBAttachInfo.i b/lldb/bindings/interface/SBAttachInfo.i similarity index 100% rename from lldb/scripts/interface/SBAttachInfo.i rename to lldb/bindings/interface/SBAttachInfo.i diff --git a/lldb/scripts/interface/SBBlock.i b/lldb/bindings/interface/SBBlock.i similarity index 99% rename from lldb/scripts/interface/SBBlock.i rename to lldb/bindings/interface/SBBlock.i index 73079a11760c6..8bd8e37953cfc 100644 --- a/lldb/scripts/interface/SBBlock.i +++ b/lldb/bindings/interface/SBBlock.i @@ -100,6 +100,8 @@ public: bool locals, bool statics); + STRING_EXTENSION(SBBlock) + #ifdef SWIGPYTHON %pythoncode %{ def get_range_at_index(self, idx): diff --git a/lldb/scripts/interface/SBBreakpoint.i b/lldb/bindings/interface/SBBreakpoint.i similarity index 99% rename from lldb/scripts/interface/SBBreakpoint.i rename to lldb/bindings/interface/SBBreakpoint.i index f84f2ada3d329..20354346be900 100644 --- a/lldb/scripts/interface/SBBreakpoint.i +++ b/lldb/bindings/interface/SBBreakpoint.i @@ -249,6 +249,8 @@ public: bool IsHardware (); + STRING_EXTENSION(SBBreakpoint) + #ifdef SWIGPYTHON %pythoncode %{ diff --git a/lldb/scripts/interface/SBBreakpointLocation.i b/lldb/bindings/interface/SBBreakpointLocation.i similarity index 97% rename from lldb/scripts/interface/SBBreakpointLocation.i rename to lldb/bindings/interface/SBBreakpointLocation.i index 44fd42b514f7f..dc39c83c2d67b 100644 --- a/lldb/scripts/interface/SBBreakpointLocation.i +++ b/lldb/bindings/interface/SBBreakpointLocation.i @@ -134,6 +134,8 @@ public: SBBreakpoint GetBreakpoint (); + + STRING_EXTENSION_LEVEL(SBBreakpointLocation, lldb::eDescriptionLevelFull) }; } // namespace lldb diff --git a/lldb/scripts/interface/SBBreakpointName.i b/lldb/bindings/interface/SBBreakpointName.i similarity index 98% rename from lldb/scripts/interface/SBBreakpointName.i rename to lldb/bindings/interface/SBBreakpointName.i index 2a06d0a2105f0..e280d42245915 100644 --- a/lldb/scripts/interface/SBBreakpointName.i +++ b/lldb/bindings/interface/SBBreakpointName.i @@ -108,6 +108,7 @@ public: bool GetDescription(lldb::SBStream &description); + STRING_EXTENSION(SBBreakpointName) }; } // namespace lldb diff --git a/lldb/scripts/interface/SBBroadcaster.i b/lldb/bindings/interface/SBBroadcaster.i similarity index 100% rename from lldb/scripts/interface/SBBroadcaster.i rename to lldb/bindings/interface/SBBroadcaster.i diff --git a/lldb/scripts/interface/SBCommandInterpreter.i b/lldb/bindings/interface/SBCommandInterpreter.i similarity index 100% rename from lldb/scripts/interface/SBCommandInterpreter.i rename to lldb/bindings/interface/SBCommandInterpreter.i diff --git a/lldb/scripts/interface/SBCommandReturnObject.i b/lldb/bindings/interface/SBCommandReturnObject.i similarity index 98% rename from lldb/scripts/interface/SBCommandReturnObject.i rename to lldb/bindings/interface/SBCommandReturnObject.i index 73d4001aaba59..affa16520f28d 100644 --- a/lldb/scripts/interface/SBCommandReturnObject.i +++ b/lldb/bindings/interface/SBCommandReturnObject.i @@ -96,6 +96,8 @@ public: void SetImmediateOutputFile(lldb::FileSP BORROWED); void SetImmediateErrorFile(lldb::FileSP BORROWED); + STRING_EXTENSION(SBCommandReturnObject) + %extend { // transfer_ownership does nothing, and is here for compatibility with // old scripts. Ownership is tracked by reference count in the ordinary way. diff --git a/lldb/scripts/interface/SBCommunication.i b/lldb/bindings/interface/SBCommunication.i similarity index 100% rename from lldb/scripts/interface/SBCommunication.i rename to lldb/bindings/interface/SBCommunication.i diff --git a/lldb/scripts/interface/SBCompileUnit.i b/lldb/bindings/interface/SBCompileUnit.i similarity index 99% rename from lldb/scripts/interface/SBCompileUnit.i rename to lldb/bindings/interface/SBCompileUnit.i index bc2d45ae8e56c..d6a4c07038c65 100644 --- a/lldb/scripts/interface/SBCompileUnit.i +++ b/lldb/bindings/interface/SBCompileUnit.i @@ -116,6 +116,8 @@ public: bool operator != (const lldb::SBCompileUnit &rhs) const; + STRING_EXTENSION(SBCompileUnit) + #ifdef SWIGPYTHON %pythoncode %{ def __iter__(self): diff --git a/lldb/scripts/interface/SBData.i b/lldb/bindings/interface/SBData.i similarity index 99% rename from lldb/scripts/interface/SBData.i rename to lldb/bindings/interface/SBData.i index fdaa6962f0eca..3e74240329e05 100644 --- a/lldb/scripts/interface/SBData.i +++ b/lldb/bindings/interface/SBData.i @@ -134,6 +134,8 @@ public: bool SetDataFromDoubleArray (double* array, size_t array_len); + STRING_EXTENSION(SBData) + #ifdef SWIGPYTHON %pythoncode %{ diff --git a/lldb/scripts/interface/SBDebugger.i b/lldb/bindings/interface/SBDebugger.i similarity index 99% rename from lldb/scripts/interface/SBDebugger.i rename to lldb/bindings/interface/SBDebugger.i index 52f65841893c6..f2e23a7ed7804 100644 --- a/lldb/scripts/interface/SBDebugger.i +++ b/lldb/bindings/interface/SBDebugger.i @@ -479,6 +479,8 @@ public: lldb::SBTypeSynthetic GetSyntheticForType (lldb::SBTypeNameSpecifier); + STRING_EXTENSION(SBDebugger) + %feature("docstring", "Launch a command interpreter session. Commands are read from standard input or from the input handle specified for the debugger object. Output/errors are diff --git a/lldb/scripts/interface/SBDeclaration.i b/lldb/bindings/interface/SBDeclaration.i similarity index 97% rename from lldb/scripts/interface/SBDeclaration.i rename to lldb/bindings/interface/SBDeclaration.i index cdaec85676461..621c1a0ab7c87 100644 --- a/lldb/scripts/interface/SBDeclaration.i +++ b/lldb/bindings/interface/SBDeclaration.i @@ -53,6 +53,8 @@ namespace lldb { bool operator != (const lldb::SBDeclaration &rhs) const; + STRING_EXTENSION(SBDeclaration) + #ifdef SWIGPYTHON %pythoncode %{ file = property(GetFileSpec, None, doc='''A read only property that returns an lldb object that represents the file (lldb.SBFileSpec) for this line entry.''') diff --git a/lldb/scripts/interface/SBError.i b/lldb/bindings/interface/SBError.i similarity index 99% rename from lldb/scripts/interface/SBError.i rename to lldb/bindings/interface/SBError.i index 96cd6c4886f5f..ea48e2263a77a 100644 --- a/lldb/scripts/interface/SBError.i +++ b/lldb/bindings/interface/SBError.i @@ -105,6 +105,8 @@ public: bool GetDescription (lldb::SBStream &description); + STRING_EXTENSION(SBError) + #ifdef SWIGPYTHON %pythoncode %{ value = property(GetError, None, doc='''A read only property that returns the same result as GetError().''') diff --git a/lldb/scripts/interface/SBEvent.i b/lldb/bindings/interface/SBEvent.i similarity index 100% rename from lldb/scripts/interface/SBEvent.i rename to lldb/bindings/interface/SBEvent.i diff --git a/lldb/scripts/interface/SBExecutionContext.i b/lldb/bindings/interface/SBExecutionContext.i similarity index 100% rename from lldb/scripts/interface/SBExecutionContext.i rename to lldb/bindings/interface/SBExecutionContext.i diff --git a/lldb/scripts/interface/SBExpressionOptions.i b/lldb/bindings/interface/SBExpressionOptions.i similarity index 100% rename from lldb/scripts/interface/SBExpressionOptions.i rename to lldb/bindings/interface/SBExpressionOptions.i diff --git a/lldb/scripts/interface/SBFile.i b/lldb/bindings/interface/SBFile.i similarity index 100% rename from lldb/scripts/interface/SBFile.i rename to lldb/bindings/interface/SBFile.i diff --git a/lldb/scripts/interface/SBFileSpec.i b/lldb/bindings/interface/SBFileSpec.i similarity index 98% rename from lldb/scripts/interface/SBFileSpec.i rename to lldb/bindings/interface/SBFileSpec.i index 07a7630ebbac8..d287a940c051a 100644 --- a/lldb/scripts/interface/SBFileSpec.i +++ b/lldb/bindings/interface/SBFileSpec.i @@ -80,6 +80,8 @@ public: void AppendPathComponent (const char *file_or_directory); + STRING_EXTENSION(SBFileSpec) + #ifdef SWIGPYTHON %pythoncode %{ def __get_fullpath__(self): diff --git a/lldb/scripts/interface/SBFileSpecList.i b/lldb/bindings/interface/SBFileSpecList.i similarity index 100% rename from lldb/scripts/interface/SBFileSpecList.i rename to lldb/bindings/interface/SBFileSpecList.i diff --git a/lldb/scripts/interface/SBFrame.i b/lldb/bindings/interface/SBFrame.i similarity index 99% rename from lldb/scripts/interface/SBFrame.i rename to lldb/bindings/interface/SBFrame.i index 811f7f22f9b4d..c65b88f863e7d 100644 --- a/lldb/scripts/interface/SBFrame.i +++ b/lldb/bindings/interface/SBFrame.i @@ -285,6 +285,8 @@ public: bool GetDescription (lldb::SBStream &description); + STRING_EXTENSION(SBFrame) + #ifdef SWIGPYTHON %pythoncode %{ def get_all_variables(self): diff --git a/lldb/scripts/interface/SBFunction.i b/lldb/bindings/interface/SBFunction.i similarity index 99% rename from lldb/scripts/interface/SBFunction.i rename to lldb/bindings/interface/SBFunction.i index 7b157bb388169..630c4db22c55d 100644 --- a/lldb/scripts/interface/SBFunction.i +++ b/lldb/bindings/interface/SBFunction.i @@ -111,6 +111,8 @@ public: bool operator != (const lldb::SBFunction &rhs) const; + STRING_EXTENSION(SBFunction) + #ifdef SWIGPYTHON %pythoncode %{ def get_instructions_from_current_target (self): diff --git a/lldb/scripts/interface/SBHostOS.i b/lldb/bindings/interface/SBHostOS.i similarity index 100% rename from lldb/scripts/interface/SBHostOS.i rename to lldb/bindings/interface/SBHostOS.i diff --git a/lldb/scripts/interface/SBInstruction.i b/lldb/bindings/interface/SBInstruction.i similarity index 98% rename from lldb/scripts/interface/SBInstruction.i rename to lldb/bindings/interface/SBInstruction.i index 09688214630b1..d50a080fd0454 100644 --- a/lldb/scripts/interface/SBInstruction.i +++ b/lldb/bindings/interface/SBInstruction.i @@ -74,6 +74,8 @@ public: bool TestEmulation (lldb::SBStream &output_stream, const char *test_file); + STRING_EXTENSION(SBInstruction) + #ifdef SWIGPYTHON %pythoncode %{ def __mnemonic_property__ (self): diff --git a/lldb/scripts/interface/SBInstructionList.i b/lldb/bindings/interface/SBInstructionList.i similarity index 98% rename from lldb/scripts/interface/SBInstructionList.i rename to lldb/bindings/interface/SBInstructionList.i index d50deba4f5e1f..1357323027573 100644 --- a/lldb/scripts/interface/SBInstructionList.i +++ b/lldb/bindings/interface/SBInstructionList.i @@ -66,6 +66,8 @@ public: bool DumpEmulationForAllInstructions (const char *triple); + STRING_EXTENSION(SBInstructionList) + #ifdef SWIGPYTHON %pythoncode %{ def __iter__(self): diff --git a/lldb/scripts/interface/SBLanguageRuntime.i b/lldb/bindings/interface/SBLanguageRuntime.i similarity index 100% rename from lldb/scripts/interface/SBLanguageRuntime.i rename to lldb/bindings/interface/SBLanguageRuntime.i diff --git a/lldb/scripts/interface/SBLaunchInfo.i b/lldb/bindings/interface/SBLaunchInfo.i similarity index 100% rename from lldb/scripts/interface/SBLaunchInfo.i rename to lldb/bindings/interface/SBLaunchInfo.i diff --git a/lldb/scripts/interface/SBLineEntry.i b/lldb/bindings/interface/SBLineEntry.i similarity index 98% rename from lldb/scripts/interface/SBLineEntry.i rename to lldb/bindings/interface/SBLineEntry.i index 90f60df23247d..be365377ba8b0 100644 --- a/lldb/scripts/interface/SBLineEntry.i +++ b/lldb/bindings/interface/SBLineEntry.i @@ -84,6 +84,8 @@ public: bool operator != (const lldb::SBLineEntry &rhs) const; + STRING_EXTENSION(SBLineEntry) + #ifdef SWIGPYTHON %pythoncode %{ file = property(GetFileSpec, None, doc='''A read only property that returns an lldb object that represents the file (lldb.SBFileSpec) for this line entry.''') diff --git a/lldb/scripts/interface/SBListener.i b/lldb/bindings/interface/SBListener.i similarity index 100% rename from lldb/scripts/interface/SBListener.i rename to lldb/bindings/interface/SBListener.i diff --git a/lldb/scripts/interface/SBMemoryRegionInfo.i b/lldb/bindings/interface/SBMemoryRegionInfo.i similarity index 96% rename from lldb/scripts/interface/SBMemoryRegionInfo.i rename to lldb/bindings/interface/SBMemoryRegionInfo.i index 7a59d0051ceac..6a2ad6a3e3649 100644 --- a/lldb/scripts/interface/SBMemoryRegionInfo.i +++ b/lldb/bindings/interface/SBMemoryRegionInfo.i @@ -55,6 +55,7 @@ public: bool GetDescription (lldb::SBStream &description); + STRING_EXTENSION(SBMemoryRegionInfo) }; } // namespace lldb diff --git a/lldb/scripts/interface/SBMemoryRegionInfoList.i b/lldb/bindings/interface/SBMemoryRegionInfoList.i similarity index 100% rename from lldb/scripts/interface/SBMemoryRegionInfoList.i rename to lldb/bindings/interface/SBMemoryRegionInfoList.i diff --git a/lldb/scripts/interface/SBModule.i b/lldb/bindings/interface/SBModule.i similarity index 99% rename from lldb/scripts/interface/SBModule.i rename to lldb/bindings/interface/SBModule.i index 03c8aeb2bed9e..a9d9480cd7cf1 100644 --- a/lldb/scripts/interface/SBModule.i +++ b/lldb/bindings/interface/SBModule.i @@ -344,6 +344,8 @@ public: lldb::SBAddress GetObjectFileEntryPointAddress() const; + STRING_EXTENSION(SBModule) + #ifdef SWIGPYTHON %pythoncode %{ def __len__(self): diff --git a/lldb/scripts/interface/SBModuleSpec.i b/lldb/bindings/interface/SBModuleSpec.i similarity index 97% rename from lldb/scripts/interface/SBModuleSpec.i rename to lldb/bindings/interface/SBModuleSpec.i index ec4e9bb7fbf72..64d0aa641a774 100644 --- a/lldb/scripts/interface/SBModuleSpec.i +++ b/lldb/bindings/interface/SBModuleSpec.i @@ -91,6 +91,7 @@ public: bool GetDescription (lldb::SBStream &description); + STRING_EXTENSION(SBModuleSpec) }; @@ -127,6 +128,7 @@ public: bool GetDescription (lldb::SBStream &description); + STRING_EXTENSION(SBModuleSpecList) }; } // namespace lldb diff --git a/lldb/scripts/interface/SBPlatform.i b/lldb/bindings/interface/SBPlatform.i similarity index 100% rename from lldb/scripts/interface/SBPlatform.i rename to lldb/bindings/interface/SBPlatform.i diff --git a/lldb/scripts/interface/SBProcess.i b/lldb/bindings/interface/SBProcess.i similarity index 99% rename from lldb/scripts/interface/SBProcess.i rename to lldb/bindings/interface/SBProcess.i index c5ebc24686155..ac6a265faec9f 100644 --- a/lldb/scripts/interface/SBProcess.i +++ b/lldb/bindings/interface/SBProcess.i @@ -417,6 +417,8 @@ public: lldb::SBProcessInfo GetProcessInfo(); + STRING_EXTENSION(SBProcess) + #ifdef SWIGPYTHON %pythoncode %{ def __get_is_alive__(self): diff --git a/lldb/scripts/interface/SBProcessInfo.i b/lldb/bindings/interface/SBProcessInfo.i similarity index 100% rename from lldb/scripts/interface/SBProcessInfo.i rename to lldb/bindings/interface/SBProcessInfo.i diff --git a/lldb/scripts/interface/SBQueue.i b/lldb/bindings/interface/SBQueue.i similarity index 100% rename from lldb/scripts/interface/SBQueue.i rename to lldb/bindings/interface/SBQueue.i diff --git a/lldb/scripts/interface/SBQueueItem.i b/lldb/bindings/interface/SBQueueItem.i similarity index 100% rename from lldb/scripts/interface/SBQueueItem.i rename to lldb/bindings/interface/SBQueueItem.i diff --git a/lldb/scripts/interface/SBSection.i b/lldb/bindings/interface/SBSection.i similarity index 99% rename from lldb/scripts/interface/SBSection.i rename to lldb/bindings/interface/SBSection.i index c1a84acc4f144..3d1c900917fd8 100644 --- a/lldb/scripts/interface/SBSection.i +++ b/lldb/bindings/interface/SBSection.i @@ -114,6 +114,8 @@ public: bool operator != (const lldb::SBSection &rhs); + STRING_EXTENSION(SBSection) + #ifdef SWIGPYTHON %pythoncode %{ def __iter__(self): diff --git a/lldb/scripts/interface/SBSourceManager.i b/lldb/bindings/interface/SBSourceManager.i similarity index 100% rename from lldb/scripts/interface/SBSourceManager.i rename to lldb/bindings/interface/SBSourceManager.i diff --git a/lldb/scripts/interface/SBStream.i b/lldb/bindings/interface/SBStream.i similarity index 100% rename from lldb/scripts/interface/SBStream.i rename to lldb/bindings/interface/SBStream.i diff --git a/lldb/scripts/interface/SBStringList.i b/lldb/bindings/interface/SBStringList.i similarity index 100% rename from lldb/scripts/interface/SBStringList.i rename to lldb/bindings/interface/SBStringList.i diff --git a/lldb/scripts/interface/SBStructuredData.i b/lldb/bindings/interface/SBStructuredData.i similarity index 100% rename from lldb/scripts/interface/SBStructuredData.i rename to lldb/bindings/interface/SBStructuredData.i diff --git a/lldb/scripts/interface/SBSymbol.i b/lldb/bindings/interface/SBSymbol.i similarity index 99% rename from lldb/scripts/interface/SBSymbol.i rename to lldb/bindings/interface/SBSymbol.i index e5880e66d300f..4e17ab5af0fd6 100644 --- a/lldb/scripts/interface/SBSymbol.i +++ b/lldb/bindings/interface/SBSymbol.i @@ -72,6 +72,8 @@ public: bool operator != (const lldb::SBSymbol &rhs) const; + STRING_EXTENSION(SBSymbol) + #ifdef SWIGPYTHON %pythoncode %{ def get_instructions_from_current_target (self): diff --git a/lldb/scripts/interface/SBSymbolContext.i b/lldb/bindings/interface/SBSymbolContext.i similarity index 99% rename from lldb/scripts/interface/SBSymbolContext.i rename to lldb/bindings/interface/SBSymbolContext.i index a6aa4d78bfe39..b6b336516c949 100644 --- a/lldb/scripts/interface/SBSymbolContext.i +++ b/lldb/bindings/interface/SBSymbolContext.i @@ -81,6 +81,7 @@ public: bool GetDescription (lldb::SBStream &description); + STRING_EXTENSION(SBSymbolContext) #ifdef SWIGPYTHON %pythoncode %{ diff --git a/lldb/scripts/interface/SBSymbolContextList.i b/lldb/bindings/interface/SBSymbolContextList.i similarity index 99% rename from lldb/scripts/interface/SBSymbolContextList.i rename to lldb/bindings/interface/SBSymbolContextList.i index 54adc659fa212..f5adcfcebfb56 100644 --- a/lldb/scripts/interface/SBSymbolContextList.i +++ b/lldb/bindings/interface/SBSymbolContextList.i @@ -60,6 +60,8 @@ public: void Clear(); + STRING_EXTENSION(SBSymbolContextList) + #ifdef SWIGPYTHON %pythoncode %{ def __iter__(self): diff --git a/lldb/scripts/interface/SBTarget.i b/lldb/bindings/interface/SBTarget.i similarity index 98% rename from lldb/scripts/interface/SBTarget.i rename to lldb/bindings/interface/SBTarget.i index 02c70b6e1cd6f..371bf5c35ebd0 100644 --- a/lldb/scripts/interface/SBTarget.i +++ b/lldb/bindings/interface/SBTarget.i @@ -967,21 +967,7 @@ public: lldb::SBValue EvaluateExpression (const char *expr, const lldb::SBExpressionOptions &options); - %extend { - %nothreadallow; - std::string lldb::SBTarget::__str__(){ - lldb::SBStream stream; - $self->GetDescription (stream, lldb::eDescriptionLevelBrief); - - const char *desc = stream.GetData(); - size_t desc_len = stream.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - - return std::string(desc, desc_len); - } - %clearnothreadallow; - } + STRING_EXTENSION_LEVEL(SBTarget, lldb::eDescriptionLevelBrief) #ifdef SWIGPYTHON %pythoncode %{ diff --git a/lldb/scripts/interface/SBThread.i b/lldb/bindings/interface/SBThread.i similarity index 99% rename from lldb/scripts/interface/SBThread.i rename to lldb/bindings/interface/SBThread.i index c1c045487fc1d..95b15b182ec26 100644 --- a/lldb/scripts/interface/SBThread.i +++ b/lldb/bindings/interface/SBThread.i @@ -402,6 +402,8 @@ public: bool SafeToCallFunctions (); + STRING_EXTENSION(SBThread) + #ifdef SWIGPYTHON %pythoncode %{ def __iter__(self): diff --git a/lldb/scripts/interface/SBThreadCollection.i b/lldb/bindings/interface/SBThreadCollection.i similarity index 100% rename from lldb/scripts/interface/SBThreadCollection.i rename to lldb/bindings/interface/SBThreadCollection.i diff --git a/lldb/scripts/interface/SBThreadPlan.i b/lldb/bindings/interface/SBThreadPlan.i similarity index 100% rename from lldb/scripts/interface/SBThreadPlan.i rename to lldb/bindings/interface/SBThreadPlan.i diff --git a/lldb/scripts/interface/SBTrace.i b/lldb/bindings/interface/SBTrace.i similarity index 100% rename from lldb/scripts/interface/SBTrace.i rename to lldb/bindings/interface/SBTrace.i diff --git a/lldb/scripts/interface/SBTraceOptions.i b/lldb/bindings/interface/SBTraceOptions.i similarity index 100% rename from lldb/scripts/interface/SBTraceOptions.i rename to lldb/bindings/interface/SBTraceOptions.i diff --git a/lldb/scripts/interface/SBType.i b/lldb/bindings/interface/SBType.i similarity index 98% rename from lldb/scripts/interface/SBType.i rename to lldb/bindings/interface/SBType.i index d9da9e39b9560..3cd82452084b4 100644 --- a/lldb/scripts/interface/SBType.i +++ b/lldb/bindings/interface/SBType.i @@ -43,6 +43,8 @@ public: uint32_t GetBitfieldSizeInBits(); + STRING_EXTENSION_LEVEL(SBTypeMember, lldb::eDescriptionLevelBrief) + #ifdef SWIGPYTHON %pythoncode %{ name = property(GetName, None, doc='''A read only property that returns the name for this member as a string.''') @@ -100,6 +102,7 @@ public: GetDescription (lldb::SBStream &description, lldb::DescriptionLevel description_level); + STRING_EXTENSION_LEVEL(SBTypeMemberFunction, lldb::eDescriptionLevelBrief) protected: lldb::TypeMemberFunctionImplSP m_opaque_sp; }; @@ -314,6 +317,8 @@ public: bool operator!=(lldb::SBType &rhs); + STRING_EXTENSION_LEVEL(SBType, lldb::eDescriptionLevelBrief) + #ifdef SWIGPYTHON %pythoncode %{ def template_arg_array(self): diff --git a/lldb/scripts/interface/SBTypeCategory.i b/lldb/bindings/interface/SBTypeCategory.i similarity index 99% rename from lldb/scripts/interface/SBTypeCategory.i rename to lldb/bindings/interface/SBTypeCategory.i index 43fe9faf70f52..b762bf8a95a36 100644 --- a/lldb/scripts/interface/SBTypeCategory.i +++ b/lldb/bindings/interface/SBTypeCategory.i @@ -124,6 +124,8 @@ namespace lldb { bool DeleteTypeSynthetic (lldb::SBTypeNameSpecifier); + STRING_EXTENSION_LEVEL(SBTypeCategory, lldb::eDescriptionLevelBrief) + #ifdef SWIGPYTHON %pythoncode %{ diff --git a/lldb/scripts/interface/SBTypeEnumMember.i b/lldb/bindings/interface/SBTypeEnumMember.i similarity index 97% rename from lldb/scripts/interface/SBTypeEnumMember.i rename to lldb/bindings/interface/SBTypeEnumMember.i index b2d8617117823..006bdeaa8cee1 100644 --- a/lldb/scripts/interface/SBTypeEnumMember.i +++ b/lldb/bindings/interface/SBTypeEnumMember.i @@ -43,6 +43,7 @@ public: GetDescription (lldb::SBStream &description, lldb::DescriptionLevel description_level); + STRING_EXTENSION_LEVEL(SBTypeEnumMember, lldb::eDescriptionLevelBrief) #ifdef SWIGPYTHON %pythoncode %{ name = property(GetName, None, doc='''A read only property that returns the name for this enum member as a string.''') diff --git a/lldb/scripts/interface/SBTypeFilter.i b/lldb/bindings/interface/SBTypeFilter.i similarity index 95% rename from lldb/scripts/interface/SBTypeFilter.i rename to lldb/bindings/interface/SBTypeFilter.i index 3759e0a23d418..c1d282c6d4fbe 100644 --- a/lldb/scripts/interface/SBTypeFilter.i +++ b/lldb/bindings/interface/SBTypeFilter.i @@ -61,6 +61,8 @@ namespace lldb { bool operator != (lldb::SBTypeFilter &rhs); + STRING_EXTENSION_LEVEL(SBTypeFilter, lldb::eDescriptionLevelBrief) + #ifdef SWIGPYTHON %pythoncode %{ options = property(GetOptions, SetOptions) diff --git a/lldb/scripts/interface/SBTypeFormat.i b/lldb/bindings/interface/SBTypeFormat.i similarity index 95% rename from lldb/scripts/interface/SBTypeFormat.i rename to lldb/bindings/interface/SBTypeFormat.i index 5efd135b73261..765a2a7bb99dc 100644 --- a/lldb/scripts/interface/SBTypeFormat.i +++ b/lldb/bindings/interface/SBTypeFormat.i @@ -61,6 +61,8 @@ namespace lldb { bool operator != (lldb::SBTypeFormat &rhs); + STRING_EXTENSION_LEVEL(SBTypeFormat, lldb::eDescriptionLevelBrief) + #ifdef SWIGPYTHON %pythoncode %{ format = property(GetFormat, SetFormat) diff --git a/lldb/scripts/interface/SBTypeNameSpecifier.i b/lldb/bindings/interface/SBTypeNameSpecifier.i similarity index 94% rename from lldb/scripts/interface/SBTypeNameSpecifier.i rename to lldb/bindings/interface/SBTypeNameSpecifier.i index bb16e86b0bc50..772f7c174093f 100644 --- a/lldb/scripts/interface/SBTypeNameSpecifier.i +++ b/lldb/bindings/interface/SBTypeNameSpecifier.i @@ -53,6 +53,8 @@ namespace lldb { bool operator != (lldb::SBTypeNameSpecifier &rhs); + STRING_EXTENSION_LEVEL(SBTypeNameSpecifier, lldb::eDescriptionLevelBrief) + #ifdef SWIGPYTHON %pythoncode %{ name = property(GetName) diff --git a/lldb/scripts/interface/SBTypeSummary.i b/lldb/bindings/interface/SBTypeSummary.i similarity index 97% rename from lldb/scripts/interface/SBTypeSummary.i rename to lldb/bindings/interface/SBTypeSummary.i index 225a404cf73c4..adcc79b5a6ee8 100644 --- a/lldb/scripts/interface/SBTypeSummary.i +++ b/lldb/bindings/interface/SBTypeSummary.i @@ -101,6 +101,8 @@ namespace lldb { bool operator != (lldb::SBTypeSummary &rhs); + STRING_EXTENSION_LEVEL(SBTypeSummary, lldb::eDescriptionLevelBrief) + #ifdef SWIGPYTHON %pythoncode %{ options = property(GetOptions, SetOptions) diff --git a/lldb/scripts/interface/SBTypeSynthetic.i b/lldb/bindings/interface/SBTypeSynthetic.i similarity index 95% rename from lldb/scripts/interface/SBTypeSynthetic.i rename to lldb/bindings/interface/SBTypeSynthetic.i index d9d75e4c9efae..f57139ebf9f17 100644 --- a/lldb/scripts/interface/SBTypeSynthetic.i +++ b/lldb/bindings/interface/SBTypeSynthetic.i @@ -63,6 +63,8 @@ namespace lldb { bool operator != (lldb::SBTypeSynthetic &rhs); + STRING_EXTENSION_LEVEL(SBTypeSynthetic, lldb::eDescriptionLevelBrief) + #ifdef SWIGPYTHON %pythoncode %{ options = property(GetOptions, SetOptions) diff --git a/lldb/scripts/interface/SBUnixSignals.i b/lldb/bindings/interface/SBUnixSignals.i similarity index 100% rename from lldb/scripts/interface/SBUnixSignals.i rename to lldb/bindings/interface/SBUnixSignals.i diff --git a/lldb/scripts/interface/SBValue.i b/lldb/bindings/interface/SBValue.i similarity index 99% rename from lldb/scripts/interface/SBValue.i rename to lldb/bindings/interface/SBValue.i index 8647854e89c15..fb899805c3951 100644 --- a/lldb/scripts/interface/SBValue.i +++ b/lldb/bindings/interface/SBValue.i @@ -440,6 +440,8 @@ public: const SBExpressionOptions &options, const char *name) const; + STRING_EXTENSION(SBValue) + #ifdef SWIGPYTHON %pythoncode %{ def __get_dynamic__ (self): diff --git a/lldb/scripts/interface/SBValueList.i b/lldb/bindings/interface/SBValueList.i similarity index 85% rename from lldb/scripts/interface/SBValueList.i rename to lldb/bindings/interface/SBValueList.i index 56ef19054e4c5..17ba2056f0c23 100644 --- a/lldb/scripts/interface/SBValueList.i +++ b/lldb/bindings/interface/SBValueList.i @@ -101,6 +101,29 @@ public: lldb::SBValue GetFirstValueByName (const char* name) const; + %extend { + %nothreadallow; + std::string lldb::SBValueList::__str__ (){ + lldb::SBStream description; + const size_t n = $self->GetSize(); + if (n) + { + for (size_t i=0; iGetValueAtIndex(i).GetDescription(description); + } + else + { + description.Printf(" lldb.SBValueList()"); + } + const char *desc = description.GetData(); + size_t desc_len = description.GetSize(); + if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) + --desc_len; + return std::string(desc, desc_len); + } + %clearnothreadallow; + } + #ifdef SWIGPYTHON %pythoncode %{ def __iter__(self): diff --git a/lldb/scripts/interface/SBVariablesOptions.i b/lldb/bindings/interface/SBVariablesOptions.i similarity index 100% rename from lldb/scripts/interface/SBVariablesOptions.i rename to lldb/bindings/interface/SBVariablesOptions.i diff --git a/lldb/scripts/interface/SBWatchpoint.i b/lldb/bindings/interface/SBWatchpoint.i similarity index 96% rename from lldb/scripts/interface/SBWatchpoint.i rename to lldb/bindings/interface/SBWatchpoint.i index e11c4f213ca2e..cb0bc5f9859ac 100644 --- a/lldb/scripts/interface/SBWatchpoint.i +++ b/lldb/bindings/interface/SBWatchpoint.i @@ -90,6 +90,7 @@ public: static lldb::SBWatchpoint GetWatchpointFromEvent (const lldb::SBEvent& event); + STRING_EXTENSION_LEVEL(SBWatchpoint, lldb::eDescriptionLevelVerbose) }; } // namespace lldb diff --git a/lldb/scripts/interfaces.swig b/lldb/bindings/interfaces.swig similarity index 99% rename from lldb/scripts/interfaces.swig rename to lldb/bindings/interfaces.swig index cc6bb91febdac..780fe34392ff5 100644 --- a/lldb/scripts/interfaces.swig +++ b/lldb/bindings/interfaces.swig @@ -32,8 +32,8 @@ %include "./interface/SBEvent.i" %include "./interface/SBExecutionContext.i" %include "./interface/SBExpressionOptions.i" -%include "./interface/SBFileSpec.i" %include "./interface/SBFile.i" +%include "./interface/SBFileSpec.i" %include "./interface/SBFileSpecList.i" %include "./interface/SBFrame.i" %include "./interface/SBFunction.i" @@ -75,9 +75,8 @@ %include "./interface/SBTypeNameSpecifier.i" %include "./interface/SBTypeSummary.i" %include "./interface/SBTypeSynthetic.i" +%include "./interface/SBUnixSignals.i" %include "./interface/SBValue.i" %include "./interface/SBValueList.i" %include "./interface/SBVariablesOptions.i" %include "./interface/SBWatchpoint.i" -%include "./interface/SBUnixSignals.i" - diff --git a/lldb/scripts/lldb_lua.swig b/lldb/bindings/lua.swig similarity index 93% rename from lldb/scripts/lldb_lua.swig rename to lldb/bindings/lua.swig index bf8809015d9a3..3b279a6b69e7f 100644 --- a/lldb/scripts/lldb_lua.swig +++ b/lldb/bindings/lua.swig @@ -9,6 +9,7 @@ %module lldb %include +%include "./macros.swig" %include "./headers.swig" %{ diff --git a/lldb/bindings/macros.swig b/lldb/bindings/macros.swig new file mode 100644 index 0000000000000..0387f27f3cb9b --- /dev/null +++ b/lldb/bindings/macros.swig @@ -0,0 +1,33 @@ +%define STRING_EXTENSION_LEVEL(Class, Level) +%extend { + %nothreadallow; + std::string lldb:: ## Class ## ::__str__(){ + lldb::SBStream stream; + $self->GetDescription (stream, Level); + const char *desc = stream.GetData(); + size_t desc_len = stream.GetSize(); + if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) { + --desc_len; + } + return std::string(desc, desc_len); + } + %clearnothreadallow; +} +%enddef + +%define STRING_EXTENSION(Class) +%extend { + %nothreadallow; + std::string lldb:: ## Class ## ::__str__(){ + lldb::SBStream stream; + $self->GetDescription (stream); + const char *desc = stream.GetData(); + size_t desc_len = stream.GetSize(); + if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) { + --desc_len; + } + return std::string(desc, desc_len); + } + %clearnothreadallow; +} +%enddef diff --git a/lldb/scripts/lldb.swig b/lldb/bindings/python.swig similarity index 86% rename from lldb/scripts/lldb.swig rename to lldb/bindings/python.swig index c3b9083327410..56fab9ff17951 100644 --- a/lldb/scripts/lldb.swig +++ b/lldb/bindings/python.swig @@ -59,6 +59,23 @@ except ImportError: // Parameter types will be used in the autodoc string. %feature("autodoc", "1"); +%define ARRAYHELPER(type,name) +%inline %{ +type *new_ ## name (int nitems) { + return (type *) malloc(sizeof(type)*nitems); +} +void delete_ ## name(type *t) { + free(t); +} +type name ## _get(type *t, int index) { + return t[index]; +} +void name ## _set(type *t, int index, type val) { + t[index] = val; +} +%} +%enddef + %pythoncode%{ import uuid import re @@ -94,20 +111,21 @@ def lldb_iter(obj, getsize, getelem): %} %include -%include "./Python/python-typemaps.swig" +%include "./python/python-typemaps.swig" +%include "./macros.swig" %include "./headers.swig" %{ -#include "../source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h" -#include "../scripts/Python/python-swigsafecast.swig" +#include "../source/Plugins/ScriptInterpreter/python/PythonDataObjects.h" +#include "../bindings/python/python-swigsafecast.swig" using namespace lldb_private; using namespace lldb_private::python; using namespace lldb; %} %include "./interfaces.swig" -%include "./Python/python-extensions.swig" -%include "./Python/python-wrapper.swig" +%include "./python/python-extensions.swig" +%include "./python/python-wrapper.swig" %pythoncode%{ debugger_unique_id = 0 diff --git a/lldb/scripts/Python/createPythonInit.py b/lldb/bindings/python/createPythonInit.py similarity index 100% rename from lldb/scripts/Python/createPythonInit.py rename to lldb/bindings/python/createPythonInit.py diff --git a/lldb/bindings/python/python-extensions.swig b/lldb/bindings/python/python-extensions.swig new file mode 100644 index 0000000000000..0b23fdd400068 --- /dev/null +++ b/lldb/bindings/python/python-extensions.swig @@ -0,0 +1,592 @@ +%extend lldb::SBBreakpoint { + %pythoncode %{ + def __eq__(self, rhs): + if not isinstance(rhs, type(self)): + return False + + return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) + + def __ne__(self, rhs): + if not isinstance(rhs, type(self)): + return True + + return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) + %} +} + +%extend lldb::SBBroadcaster { + %pythoncode %{ + def __eq__(self, rhs): + if not isinstance(rhs, type(self)): + return False + + return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) + + def __ne__(self, rhs): + if not isinstance(rhs, type(self)): + return True + + return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) + %} +} + +%extend lldb::SBCommandReturnObject { + /* the write() and flush() calls are not part of the SB API proper, and are solely for Python usage + they are meant to make an SBCommandReturnObject into a file-like object so that instructions of the sort + print >>sb_command_return_object, "something" + will work correctly */ + + void lldb::SBCommandReturnObject::write (const char* str) + { + if (str) + $self->Printf("%s",str); + } + void lldb::SBCommandReturnObject::flush () + {} +} + +%extend lldb::SBCompileUnit { + %pythoncode %{ + def __eq__(self, rhs): + if not isinstance(rhs, type(self)): + return False + + return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) + + def __ne__(self, rhs): + if not isinstance(rhs, type(self)): + return True + + return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) + %} +} + +%extend lldb::SBDeclaration { + %pythoncode %{ + def __eq__(self, rhs): + if not isinstance(rhs, type(self)): + return False + + return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) + + def __ne__(self, rhs): + if not isinstance(rhs, type(self)): + return True + + return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) + %} +} + +%extend lldb::SBFunction { + %pythoncode %{ + def __eq__(self, rhs): + if not isinstance(rhs, type(self)): + return False + + return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) + + def __ne__(self, rhs): + if not isinstance(rhs, type(self)): + return True + + return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) + %} +} + +%extend lldb::SBLineEntry { + %pythoncode %{ + def __eq__(self, rhs): + if not isinstance(rhs, type(self)): + return False + + return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) + + def __ne__(self, rhs): + if not isinstance(rhs, type(self)): + return True + + return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) + %} +} + +%extend lldb::SBModule { + %pythoncode %{ + def __eq__(self, rhs): + if not isinstance(rhs, type(self)): + return False + + return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) + + def __ne__(self, rhs): + if not isinstance(rhs, type(self)): + return True + + return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) + %} +} + +%extend lldb::SBSection { + %pythoncode %{ + def __eq__(self, rhs): + if not isinstance(rhs, type(self)): + return False + + return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) + + def __ne__(self, rhs): + if not isinstance(rhs, type(self)): + return True + + return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) + %} +} +%extend lldb::SBStream { + /* the write() and flush() calls are not part of the SB API proper, and are solely for Python usage + they are meant to make an SBStream into a file-like object so that instructions of the sort + print >>sb_stream, "something" + will work correctly */ + + void lldb::SBStream::write (const char* str) + { + if (str) + $self->Printf("%s",str); + } + void lldb::SBStream::flush () + {} +} +%extend lldb::SBSymbol { + %pythoncode %{ + def __eq__(self, rhs): + if not isinstance(rhs, type(self)): + return False + + return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) + + def __ne__(self, rhs): + if not isinstance(rhs, type(self)): + return True + + return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) + %} +} + +%extend lldb::SBTarget { + %pythoncode %{ + def __eq__(self, rhs): + if not isinstance(rhs, type(self)): + return False + + return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) + + def __ne__(self, rhs): + if not isinstance(rhs, type(self)): + return True + + return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) + %} +} + +%extend lldb::SBTypeFilter { + %pythoncode %{ + def __eq__(self, rhs): + if not isinstance(rhs, type(self)): + return False + + return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) + + def __ne__(self, rhs): + if not isinstance(rhs, type(self)): + return True + + return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) + %} +} + +%extend lldb::SBTypeNameSpecifier { + %pythoncode %{ + def __eq__(self, rhs): + if not isinstance(rhs, type(self)): + return False + + return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) + + def __ne__(self, rhs): + if not isinstance(rhs, type(self)): + return True + + return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) + %} +} + +%extend lldb::SBTypeSummary { + %pythoncode %{ + def __eq__(self, rhs): + if not isinstance(rhs, type(self)): + return False + + return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) + + def __ne__(self, rhs): + if not isinstance(rhs, type(self)): + return True + + return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) + %} +} + +%extend lldb::SBTypeSynthetic { + %pythoncode %{ + def __eq__(self, rhs): + if not isinstance(rhs, type(self)): + return False + + return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) + + def __ne__(self, rhs): + if not isinstance(rhs, type(self)): + return True + + return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) + %} +} + +%extend lldb::SBThread { + %pythoncode %{ + def __eq__(self, rhs): + if not isinstance(rhs, type(self)): + return False + + return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) + + def __ne__(self, rhs): + if not isinstance(rhs, type(self)): + return True + + return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) + %} +} + +%pythoncode %{ + +def command(command_name=None, doc=None): + import lldb + """A decorator function that registers an LLDB command line + command that is bound to the function it is attached to.""" + def callable(function): + """Registers an lldb command for the decorated function.""" + command = "command script add -f %s.%s %s" % (function.__module__, function.__name__, command_name or function.__name__) + lldb.debugger.HandleCommand(command) + if doc: + function.__doc__ = doc + return function + + return callable + +class declaration(object): + '''A class that represents a source declaration location with file, line and column.''' + def __init__(self, file, line, col): + self.file = file + self.line = line + self.col = col + +class value_iter(object): + def __iter__(self): + return self + + def __next__(self): + if self.index >= self.length: + raise StopIteration() + child_sbvalue = self.sbvalue.GetChildAtIndex(self.index) + self.index += 1 + return value(child_sbvalue) + + def next(self): + return self.__next__() + + def __init__(self,value): + self.index = 0 + self.sbvalue = value + if type(self.sbvalue) is value: + self.sbvalue = self.sbvalue.sbvalue + self.length = self.sbvalue.GetNumChildren() + +class value(object): + '''A class designed to wrap lldb.SBValue() objects so the resulting object + can be used as a variable would be in code. So if you have a Point structure + variable in your code in the current frame named "pt", you can initialize an instance + of this class with it: + + pt = lldb.value(lldb.frame.FindVariable("pt")) + print pt + print pt.x + print pt.y + + pt = lldb.value(lldb.frame.FindVariable("rectangle_array")) + print rectangle_array[12] + print rectangle_array[5].origin.x''' + def __init__(self, sbvalue): + self.sbvalue = sbvalue + + def __nonzero__(self): + return self.sbvalue.__nonzero__() + + def __bool__(self): + return self.sbvalue.__bool__() + + def __str__(self): + return self.sbvalue.__str__() + + def __getitem__(self, key): + # Allow array access if this value has children... + if type(key) is value: + key = int(key) + if type(key) is int: + child_sbvalue = (self.sbvalue.GetValueForExpressionPath("[%i]" % key)) + if child_sbvalue and child_sbvalue.IsValid(): + return value(child_sbvalue) + raise IndexError("Index '%d' is out of range" % key) + raise TypeError("No array item of type %s" % str(type(key))) + + def __iter__(self): + return value_iter(self.sbvalue) + + def __getattr__(self, name): + child_sbvalue = self.sbvalue.GetChildMemberWithName (name) + if child_sbvalue and child_sbvalue.IsValid(): + return value(child_sbvalue) + raise AttributeError("Attribute '%s' is not defined" % name) + + def __add__(self, other): + return int(self) + int(other) + + def __sub__(self, other): + return int(self) - int(other) + + def __mul__(self, other): + return int(self) * int(other) + + def __floordiv__(self, other): + return int(self) // int(other) + + def __mod__(self, other): + return int(self) % int(other) + + def __divmod__(self, other): + return int(self) % int(other) + + def __pow__(self, other): + return int(self) ** int(other) + + def __lshift__(self, other): + return int(self) << int(other) + + def __rshift__(self, other): + return int(self) >> int(other) + + def __and__(self, other): + return int(self) & int(other) + + def __xor__(self, other): + return int(self) ^ int(other) + + def __or__(self, other): + return int(self) | int(other) + + def __div__(self, other): + return int(self) / int(other) + + def __truediv__(self, other): + return int(self) / int(other) + + def __iadd__(self, other): + result = self.__add__(other) + self.sbvalue.SetValueFromCString (str(result)) + return result + + def __isub__(self, other): + result = self.__sub__(other) + self.sbvalue.SetValueFromCString (str(result)) + return result + + def __imul__(self, other): + result = self.__mul__(other) + self.sbvalue.SetValueFromCString (str(result)) + return result + + def __idiv__(self, other): + result = self.__div__(other) + self.sbvalue.SetValueFromCString (str(result)) + return result + + def __itruediv__(self, other): + result = self.__truediv__(other) + self.sbvalue.SetValueFromCString (str(result)) + return result + + def __ifloordiv__(self, other): + result = self.__floordiv__(self, other) + self.sbvalue.SetValueFromCString (str(result)) + return result + + def __imod__(self, other): + result = self.__and__(self, other) + self.sbvalue.SetValueFromCString (str(result)) + return result + + def __ipow__(self, other): + result = self.__pow__(self, other) + self.sbvalue.SetValueFromCString (str(result)) + return result + + def __ipow__(self, other, modulo): + result = self.__pow__(self, other, modulo) + self.sbvalue.SetValueFromCString (str(result)) + return result + + def __ilshift__(self, other): + result = self.__lshift__(other) + self.sbvalue.SetValueFromCString (str(result)) + return result + + def __irshift__(self, other): + result = self.__rshift__(other) + self.sbvalue.SetValueFromCString (str(result)) + return result + + def __iand__(self, other): + result = self.__and__(self, other) + self.sbvalue.SetValueFromCString (str(result)) + return result + + def __ixor__(self, other): + result = self.__xor__(self, other) + self.sbvalue.SetValueFromCString (str(result)) + return result + + def __ior__(self, other): + result = self.__ior__(self, other) + self.sbvalue.SetValueFromCString (str(result)) + return result + + def __neg__(self): + return -int(self) + + def __pos__(self): + return +int(self) + + def __abs__(self): + return abs(int(self)) + + def __invert__(self): + return ~int(self) + + def __complex__(self): + return complex (int(self)) + + def __int__(self): + is_num,is_sign = is_numeric_type(self.sbvalue.GetType().GetCanonicalType().GetBasicType()) + if is_num and not is_sign: return self.sbvalue.GetValueAsUnsigned() + return self.sbvalue.GetValueAsSigned() + + def __long__(self): + return self.__int__() + + def __float__(self): + return float (self.sbvalue.GetValueAsSigned()) + + def __oct__(self): + return '0%o' % self.sbvalue.GetValueAsUnsigned() + + def __hex__(self): + return '0x%x' % self.sbvalue.GetValueAsUnsigned() + + def __len__(self): + return self.sbvalue.GetNumChildren() + + def __eq__(self, other): + if type(other) is int: + return int(self) == other + elif type(other) is str: + return str(self) == other + elif type(other) is value: + self_err = SBError() + other_err = SBError() + self_val = self.sbvalue.GetValueAsUnsigned(self_err) + if self_err.fail: + raise ValueError("unable to extract value of self") + other_val = other.sbvalue.GetValueAsUnsigned(other_err) + if other_err.fail: + raise ValueError("unable to extract value of other") + return self_val == other_val + raise TypeError("Unknown type %s, No equality operation defined." % str(type(other))) + + def __ne__(self, other): + return not self.__eq__(other) +%} + +%pythoncode %{ + +class SBSyntheticValueProvider(object): + def __init__(self,valobj): + pass + + def num_children(self): + return 0 + + def get_child_index(self,name): + return None + + def get_child_at_index(self,idx): + return None + + def update(self): + pass + + def has_children(self): + return False + + +%} + +%pythoncode %{ + +# given an lldb.SBBasicType it returns a tuple +# (is_numeric, is_signed) +# the value of is_signed is undefined if is_numeric == false +def is_numeric_type(basic_type): + if basic_type == eBasicTypeInvalid: return (False,False) + if basic_type == eBasicTypeVoid: return (False,False) + if basic_type == eBasicTypeChar: return (True,False) + if basic_type == eBasicTypeSignedChar: return (True,True) + if basic_type == eBasicTypeUnsignedChar: return (True,False) + if basic_type == eBasicTypeWChar: return (True,False) + if basic_type == eBasicTypeSignedWChar: return (True,True) + if basic_type == eBasicTypeUnsignedWChar: return (True,False) + if basic_type == eBasicTypeChar16: return (True,False) + if basic_type == eBasicTypeChar32: return (True,False) + if basic_type == eBasicTypeShort: return (True,True) + if basic_type == eBasicTypeUnsignedShort: return (True,False) + if basic_type == eBasicTypeInt: return (True,True) + if basic_type == eBasicTypeUnsignedInt: return (True,False) + if basic_type == eBasicTypeLong: return (True,True) + if basic_type == eBasicTypeUnsignedLong: return (True,False) + if basic_type == eBasicTypeLongLong: return (True,True) + if basic_type == eBasicTypeUnsignedLongLong: return (True,False) + if basic_type == eBasicTypeInt128: return (True,True) + if basic_type == eBasicTypeUnsignedInt128: return (True,False) + if basic_type == eBasicTypeBool: return (False,False) + if basic_type == eBasicTypeHalf: return (True,True) + if basic_type == eBasicTypeFloat: return (True,True) + if basic_type == eBasicTypeDouble: return (True,True) + if basic_type == eBasicTypeLongDouble: return (True,True) + if basic_type == eBasicTypeFloatComplex: return (True,True) + if basic_type == eBasicTypeDoubleComplex: return (True,True) + if basic_type == eBasicTypeLongDoubleComplex: return (True,True) + if basic_type == eBasicTypeObjCID: return (False,False) + if basic_type == eBasicTypeObjCClass: return (False,False) + if basic_type == eBasicTypeObjCSel: return (False,False) + if basic_type == eBasicTypeNullPtr: return (False,False) + #if basic_type == eBasicTypeOther: + return (False,False) + +%} diff --git a/lldb/scripts/Python/python-swigsafecast.swig b/lldb/bindings/python/python-swigsafecast.swig similarity index 100% rename from lldb/scripts/Python/python-swigsafecast.swig rename to lldb/bindings/python/python-swigsafecast.swig diff --git a/lldb/scripts/Python/python-typemaps.swig b/lldb/bindings/python/python-typemaps.swig similarity index 100% rename from lldb/scripts/Python/python-typemaps.swig rename to lldb/bindings/python/python-typemaps.swig diff --git a/lldb/scripts/Python/python-wrapper.swig b/lldb/bindings/python/python-wrapper.swig similarity index 100% rename from lldb/scripts/Python/python-wrapper.swig rename to lldb/bindings/python/python-wrapper.swig diff --git a/lldb/docs/CMakeLists.txt b/lldb/docs/CMakeLists.txt index 0082d004bd0d6..8fa46860e5cec 100644 --- a/lldb/docs/CMakeLists.txt +++ b/lldb/docs/CMakeLists.txt @@ -30,9 +30,9 @@ if (LLDB_ENABLE_PYTHON) # Because we don't build liblldb, epydoc will complain that the import of # _lldb.so failed, but that doesn't prevent it from generating the docs. file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lldb) - get_target_property(lldb_scripts_dir swig_wrapper BINARY_DIR) + get_target_property(lldb_bindings_dir swig_wrapper BINARY_DIR) add_custom_target(lldb-python-doc-package - COMMAND "${CMAKE_COMMAND}" -E copy "${lldb_scripts_dir}/lldb.py" "${CMAKE_CURRENT_BINARY_DIR}/lldb/__init__.py" + COMMAND "${CMAKE_COMMAND}" -E copy "${lldb_bindings_dir}/lldb.py" "${CMAKE_CURRENT_BINARY_DIR}/lldb/__init__.py" COMMENT "Copying lldb.py to pretend package.") add_dependencies(lldb-python-doc-package swig_wrapper) diff --git a/lldb/include/lldb/Expression/DWARFExpression.h b/lldb/include/lldb/Expression/DWARFExpression.h index 1e32957443fd3..bfae142d5e016 100644 --- a/lldb/include/lldb/Expression/DWARFExpression.h +++ b/lldb/include/lldb/Expression/DWARFExpression.h @@ -34,15 +34,6 @@ namespace lldb_private { /// location expression or a location list and interprets it. class DWARFExpression { public: - enum LocationListFormat : uint8_t { - NonLocationList, // Not a location list - RegularLocationList, // Location list format used in non-split dwarf files - SplitDwarfLocationList, // Location list format used in pre-DWARF v5 split - // dwarf files (.debug_loc.dwo) - LocLists, // Location list format used in DWARF v5 - // (.debug_loclists/.debug_loclists.dwo). - }; - DWARFExpression(); /// Constructor diff --git a/lldb/include/lldb/Interpreter/OptionValue.h b/lldb/include/lldb/Interpreter/OptionValue.h index 734c92b4bcada..44c7f621a5824 100644 --- a/lldb/include/lldb/Interpreter/OptionValue.h +++ b/lldb/include/lldb/Interpreter/OptionValue.h @@ -58,8 +58,7 @@ class OptionValue { eDumpGroupExport = (eDumpOptionCommand | eDumpOptionName | eDumpOptionValue) }; - OptionValue() - : m_callback(nullptr), m_baton(nullptr), m_value_was_set(false) {} + OptionValue() : m_value_was_set(false) {} virtual ~OptionValue() = default; @@ -304,22 +303,19 @@ class OptionValue { m_parent_wp = parent_sp; } - void SetValueChangedCallback(OptionValueChangedCallback callback, - void *baton) { - assert(m_callback == nullptr); - m_callback = callback; - m_baton = baton; + void SetValueChangedCallback(std::function callback) { + assert(!m_callback); + m_callback = std::move(callback); } void NotifyValueChanged() { if (m_callback) - m_callback(m_baton, this); + m_callback(); } protected: lldb::OptionValueWP m_parent_wp; - OptionValueChangedCallback m_callback; - void *m_baton; + std::function m_callback; bool m_value_was_set; // This can be used to see if a value has been set // by a call to SetValueFromCString(). It is often // handy to know if an option value was set from the diff --git a/lldb/include/lldb/Interpreter/OptionValueProperties.h b/lldb/include/lldb/Interpreter/OptionValueProperties.h index bea2b3c91e009..980f01183ef56 100644 --- a/lldb/include/lldb/Interpreter/OptionValueProperties.h +++ b/lldb/include/lldb/Interpreter/OptionValueProperties.h @@ -198,8 +198,7 @@ class OptionValueProperties ConstString name); void SetValueChangedCallback(uint32_t property_idx, - OptionValueChangedCallback callback, - void *baton); + std::function callback); protected: Property *ProtectedGetPropertyAtIndex(uint32_t idx) { diff --git a/lldb/include/lldb/Interpreter/Property.h b/lldb/include/lldb/Interpreter/Property.h index 797aee4be8159..76264832705ba 100644 --- a/lldb/include/lldb/Interpreter/Property.h +++ b/lldb/include/lldb/Interpreter/Property.h @@ -64,8 +64,7 @@ class Property { uint32_t output_width, bool display_qualified_name) const; - void SetValueChangedCallback(OptionValueChangedCallback callback, - void *baton); + void SetValueChangedCallback(std::function callback); protected: ConstString m_name; diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h index 47c5c78704052..2ba996d4995f5 100644 --- a/lldb/include/lldb/Target/Process.h +++ b/lldb/include/lldb/Target/Process.h @@ -85,9 +85,6 @@ class ProcessProperties : public Properties { std::chrono::seconds GetUtilityExpressionTimeout() const; protected: - static void OptionValueChangedCallback(void *baton, - OptionValue *option_value); - Process *m_process; // Can be nullptr for global ProcessProperties }; diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h index 6f8d60731acf5..1e9153c401ef1 100644 --- a/lldb/include/lldb/Target/Target.h +++ b/lldb/include/lldb/Target/Target.h @@ -209,26 +209,15 @@ class TargetProperties : public Properties { private: // Callbacks for m_launch_info. - static void Arg0ValueChangedCallback(void *target_property_ptr, - OptionValue *); - static void RunArgsValueChangedCallback(void *target_property_ptr, - OptionValue *); - static void EnvVarsValueChangedCallback(void *target_property_ptr, - OptionValue *); - static void InheritEnvValueChangedCallback(void *target_property_ptr, - OptionValue *); - static void InputPathValueChangedCallback(void *target_property_ptr, - OptionValue *); - static void OutputPathValueChangedCallback(void *target_property_ptr, - OptionValue *); - static void ErrorPathValueChangedCallback(void *target_property_ptr, - OptionValue *); - static void DetachOnErrorValueChangedCallback(void *target_property_ptr, - OptionValue *); - static void DisableASLRValueChangedCallback(void *target_property_ptr, - OptionValue *); - static void DisableSTDIOValueChangedCallback(void *target_property_ptr, - OptionValue *); + void Arg0ValueChangedCallback(); + void RunArgsValueChangedCallback(); + void EnvVarsValueChangedCallback(); + void InputPathValueChangedCallback(); + void OutputPathValueChangedCallback(); + void ErrorPathValueChangedCallback(); + void DetachOnErrorValueChangedCallback(); + void DisableASLRValueChangedCallback(); + void DisableSTDIOValueChangedCallback(); // Member variables. ProcessLaunchInfo m_launch_info; diff --git a/lldb/include/lldb/lldb-private-interfaces.h b/lldb/include/lldb/lldb-private-interfaces.h index 04b78bcc19f8e..27a2c4c3f27ff 100644 --- a/lldb/include/lldb/lldb-private-interfaces.h +++ b/lldb/include/lldb/lldb-private-interfaces.h @@ -82,8 +82,6 @@ typedef bool (*BreakpointHitCallback)(void *baton, typedef bool (*WatchpointHitCallback)(void *baton, StoppointCallbackContext *context, lldb::user_id_t watch_id); -typedef void (*OptionValueChangedCallback)(void *baton, - OptionValue *option_value); typedef bool (*ThreadPlanShouldStopHereCallback)( ThreadPlan *current_plan, Flags &flags, lldb::FrameComparison operation, Status &status, void *baton); diff --git a/lldb/packages/Python/lldbsuite/test/commands/target/create-no-such-arch/TestNoSuchArch.py b/lldb/packages/Python/lldbsuite/test/commands/target/create-no-such-arch/TestNoSuchArch.py index a780ca2756647..4d7f0838f8776 100644 --- a/lldb/packages/Python/lldbsuite/test/commands/target/create-no-such-arch/TestNoSuchArch.py +++ b/lldb/packages/Python/lldbsuite/test/commands/target/create-no-such-arch/TestNoSuchArch.py @@ -19,8 +19,8 @@ def test(self): # Check that passing an invalid arch via the command-line fails but # doesn't crash self.expect( - "target crete --arch nothingtoseehere %s" % - (exe), error=True) + "target create --arch nothingtoseehere %s" % + (exe), error=True, substrs=["error: invalid triple 'nothingtoseehere'"]) # Check that passing an invalid arch via the SB API fails but doesn't # crash diff --git a/lldb/scripts/Python/python-extensions.swig b/lldb/scripts/Python/python-extensions.swig deleted file mode 100644 index dbd4b1d79d005..0000000000000 --- a/lldb/scripts/Python/python-extensions.swig +++ /dev/null @@ -1,1109 +0,0 @@ -%extend lldb::SBAddress { - %nothreadallow; - PyObject *lldb::SBAddress::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} -%extend lldb::SBBlock { - %nothreadallow; - PyObject *lldb::SBBlock::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} -%extend lldb::SBBreakpoint { - %nothreadallow; - PyObject *lldb::SBBreakpoint::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; - - %pythoncode %{ - def __eq__(self, rhs): - if not isinstance(rhs, type(self)): - return False - - return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) - - def __ne__(self, rhs): - if not isinstance(rhs, type(self)): - return True - - return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) - %} - -} -%extend lldb::SBBreakpointLocation { - %nothreadallow; - PyObject *lldb::SBBreakpointLocation::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description, lldb::eDescriptionLevelFull); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} - -%extend lldb::SBBreakpointName { - %nothreadallow; - PyObject *lldb::SBBreakpointName::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} - -%extend lldb::SBBroadcaster { - %pythoncode %{ - def __eq__(self, rhs): - if not isinstance(rhs, type(self)): - return False - - return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) - - def __ne__(self, rhs): - if not isinstance(rhs, type(self)): - return True - - return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) - %} -} - -%extend lldb::SBCommandReturnObject { - %nothreadallow; - PyObject *lldb::SBCommandReturnObject::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; - - /* the write() and flush() calls are not part of the SB API proper, and are solely for Python usage - they are meant to make an SBCommandReturnObject into a file-like object so that instructions of the sort - print >>sb_command_return_object, "something" - will work correctly */ - - void lldb::SBCommandReturnObject::write (const char* str) - { - if (str) - $self->Printf("%s",str); - } - void lldb::SBCommandReturnObject::flush () - {} -} -%extend lldb::SBCompileUnit { - %nothreadallow; - PyObject *lldb::SBCompileUnit::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; - %pythoncode %{ - def __eq__(self, rhs): - if not isinstance(rhs, type(self)): - return False - - return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) - - def __ne__(self, rhs): - if not isinstance(rhs, type(self)): - return True - - return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) - %} -} -%extend lldb::SBData { - %nothreadallow; - PyObject *lldb::SBData::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} -%extend lldb::SBDebugger { - %nothreadallow; - PyObject *lldb::SBDebugger::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} -%extend lldb::SBDeclaration { - %nothreadallow; - PyObject *lldb::SBDeclaration::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; - - %pythoncode %{ - def __eq__(self, rhs): - if not isinstance(rhs, type(self)): - return False - - return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) - - def __ne__(self, rhs): - if not isinstance(rhs, type(self)): - return True - - return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) - %} - -} -%extend lldb::SBError { - %nothreadallow; - PyObject *lldb::SBError::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} -%extend lldb::SBFileSpec { - %nothreadallow; - PyObject *lldb::SBFileSpec::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} -%extend lldb::SBFrame { - %nothreadallow; - PyObject *lldb::SBFrame::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} -%extend lldb::SBFunction { - %nothreadallow; - PyObject *lldb::SBFunction::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; - - %pythoncode %{ - def __eq__(self, rhs): - if not isinstance(rhs, type(self)): - return False - - return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) - - def __ne__(self, rhs): - if not isinstance(rhs, type(self)): - return True - - return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) - %} - -} -%extend lldb::SBInstruction { - %nothreadallow; - PyObject *lldb::SBInstruction::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} -%extend lldb::SBInstructionList { - %nothreadallow; - PyObject *lldb::SBInstructionList::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} -%extend lldb::SBLineEntry { - %nothreadallow; - PyObject *lldb::SBLineEntry::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; - - %pythoncode %{ - def __eq__(self, rhs): - if not isinstance(rhs, type(self)): - return False - - return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) - - def __ne__(self, rhs): - if not isinstance(rhs, type(self)): - return True - - return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) - %} -} - -%extend lldb::SBMemoryRegionInfo { - %nothreadallow; - PyObject *lldb::SBMemoryRegionInfo::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} - -%extend lldb::SBModule { - %nothreadallow; - PyObject *lldb::SBModule::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; - - %pythoncode %{ - def __eq__(self, rhs): - if not isinstance(rhs, type(self)): - return False - - return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) - - def __ne__(self, rhs): - if not isinstance(rhs, type(self)): - return True - - return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) - %} -} - -%extend lldb::SBModuleSpec { - %nothreadallow; - PyObject *lldb::SBModuleSpec::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} - -%extend lldb::SBModuleSpecList { - %nothreadallow; - PyObject *lldb::SBModuleSpecList::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} - -%extend lldb::SBProcess { - %nothreadallow; - PyObject *lldb::SBProcess::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} -%extend lldb::SBSection { - %nothreadallow; - PyObject *lldb::SBSection::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; - - %pythoncode %{ - def __eq__(self, rhs): - if not isinstance(rhs, type(self)): - return False - - return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) - - def __ne__(self, rhs): - if not isinstance(rhs, type(self)): - return True - - return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) - %} -} -%extend lldb::SBStream { - /* the write() and flush() calls are not part of the SB API proper, and are solely for Python usage - they are meant to make an SBStream into a file-like object so that instructions of the sort - print >>sb_stream, "something" - will work correctly */ - - void lldb::SBStream::write (const char* str) - { - if (str) - $self->Printf("%s",str); - } - void lldb::SBStream::flush () - {} -} -%extend lldb::SBSymbol { - %nothreadallow; - PyObject *lldb::SBSymbol::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; - %pythoncode %{ - def __eq__(self, rhs): - if not isinstance(rhs, type(self)): - return False - - return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) - - def __ne__(self, rhs): - if not isinstance(rhs, type(self)): - return True - - return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) - %} -} -%extend lldb::SBSymbolContext { - %nothreadallow; - PyObject *lldb::SBSymbolContext::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} -%extend lldb::SBSymbolContextList { - %nothreadallow; - PyObject *lldb::SBSymbolContextList::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} - -%extend lldb::SBTarget { - %pythoncode %{ - def __eq__(self, rhs): - if not isinstance(rhs, type(self)): - return False - - return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) - - def __ne__(self, rhs): - if not isinstance(rhs, type(self)): - return True - - return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) - %} -} - -%extend lldb::SBType { - %nothreadallow; - PyObject *lldb::SBType::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description, lldb::eDescriptionLevelBrief); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} -%extend lldb::SBTypeCategory { - %nothreadallow; - PyObject *lldb::SBTypeCategory::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description, lldb::eDescriptionLevelBrief); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} -%extend lldb::SBTypeFilter { - %nothreadallow; - PyObject *lldb::SBTypeFilter::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description, lldb::eDescriptionLevelBrief); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; - %pythoncode %{ - def __eq__(self, rhs): - if not isinstance(rhs, type(self)): - return False - - return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) - - def __ne__(self, rhs): - if not isinstance(rhs, type(self)): - return True - - return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) - %} -} -%extend lldb::SBTypeFormat { - %nothreadallow; - PyObject *lldb::SBTypeFormat::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description, lldb::eDescriptionLevelBrief); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} -%extend lldb::SBTypeMember { - %nothreadallow; - PyObject *lldb::SBTypeMember::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description, lldb::eDescriptionLevelBrief); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} -%extend lldb::SBTypeMemberFunction { - %nothreadallow; - PyObject *lldb::SBTypeMemberFunction::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description, lldb::eDescriptionLevelBrief); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} -%extend lldb::SBTypeEnumMember { - %nothreadallow; - PyObject *lldb::SBTypeEnumMember::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description, lldb::eDescriptionLevelBrief); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} -%extend lldb::SBTypeNameSpecifier { - %nothreadallow; - PyObject *lldb::SBTypeNameSpecifier::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description, lldb::eDescriptionLevelBrief); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; - %pythoncode %{ - def __eq__(self, rhs): - if not isinstance(rhs, type(self)): - return False - - return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) - - def __ne__(self, rhs): - if not isinstance(rhs, type(self)): - return True - - return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) - %} -} -%extend lldb::SBTypeSummary { - %nothreadallow; - PyObject *lldb::SBTypeSummary::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description, lldb::eDescriptionLevelBrief); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; - %pythoncode %{ - def __eq__(self, rhs): - if not isinstance(rhs, type(self)): - return False - - return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) - - def __ne__(self, rhs): - if not isinstance(rhs, type(self)): - return True - - return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) - %} -} -%extend lldb::SBTypeSynthetic { - %nothreadallow; - PyObject *lldb::SBTypeSynthetic::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description, lldb::eDescriptionLevelBrief); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; - %pythoncode %{ - def __eq__(self, rhs): - if not isinstance(rhs, type(self)): - return False - - return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) - - def __ne__(self, rhs): - if not isinstance(rhs, type(self)): - return True - - return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) - %} -} -%extend lldb::SBThread { - %nothreadallow; - PyObject *lldb::SBThread::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; - %pythoncode %{ - def __eq__(self, rhs): - if not isinstance(rhs, type(self)): - return False - - return getattr(_lldb,self.__class__.__name__+"___eq__")(self, rhs) - - def __ne__(self, rhs): - if not isinstance(rhs, type(self)): - return True - - return getattr(_lldb,self.__class__.__name__+"___ne__")(self, rhs) - %} -} -%extend lldb::SBValue { - %nothreadallow; - PyObject *lldb::SBValue::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} -%extend lldb::SBValueList { - %nothreadallow; - PyObject *lldb::SBValueList::__str__ (){ - lldb::SBStream description; - const size_t n = $self->GetSize(); - if (n) - { - for (size_t i=0; iGetValueAtIndex(i).GetDescription(description); - } - else - { - description.Printf(" lldb.SBValueList()"); - } - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} -%extend lldb::SBWatchpoint { - %nothreadallow; - PyObject *lldb::SBWatchpoint::__str__ (){ - lldb::SBStream description; - $self->GetDescription (description, lldb::eDescriptionLevelVerbose); - const char *desc = description.GetData(); - size_t desc_len = description.GetSize(); - if (desc_len > 0 && (desc[desc_len-1] == '\n' || desc[desc_len-1] == '\r')) - --desc_len; - return PythonString(llvm::StringRef(desc, desc_len)).release(); - } - %clearnothreadallow; -} - - -// %extend lldb::SBDebugger { -// // FIXME: We can't get the callback and baton -// PyObject *lldb::SBDebugger (){ -// // Only call Py_XDECREF if we have a Python object (or NULL) -// if (LLDBSwigPythonCallPythonLogOutputCallback == $self->GetLogOutPutCallback()) -// Py_XDECREF($self->GetCallbackBaton()); -// } -// } - -%pythoncode %{ - -def command(command_name=None, doc=None): - import lldb - """A decorator function that registers an LLDB command line - command that is bound to the function it is attached to.""" - def callable(function): - """Registers an lldb command for the decorated function.""" - command = "command script add -f %s.%s %s" % (function.__module__, function.__name__, command_name or function.__name__) - lldb.debugger.HandleCommand(command) - if doc: - function.__doc__ = doc - return function - - return callable - -class declaration(object): - '''A class that represents a source declaration location with file, line and column.''' - def __init__(self, file, line, col): - self.file = file - self.line = line - self.col = col - -class value_iter(object): - def __iter__(self): - return self - - def __next__(self): - if self.index >= self.length: - raise StopIteration() - child_sbvalue = self.sbvalue.GetChildAtIndex(self.index) - self.index += 1 - return value(child_sbvalue) - - def next(self): - return self.__next__() - - def __init__(self,value): - self.index = 0 - self.sbvalue = value - if type(self.sbvalue) is value: - self.sbvalue = self.sbvalue.sbvalue - self.length = self.sbvalue.GetNumChildren() - -class value(object): - '''A class designed to wrap lldb.SBValue() objects so the resulting object - can be used as a variable would be in code. So if you have a Point structure - variable in your code in the current frame named "pt", you can initialize an instance - of this class with it: - - pt = lldb.value(lldb.frame.FindVariable("pt")) - print pt - print pt.x - print pt.y - - pt = lldb.value(lldb.frame.FindVariable("rectangle_array")) - print rectangle_array[12] - print rectangle_array[5].origin.x''' - def __init__(self, sbvalue): - self.sbvalue = sbvalue - - def __nonzero__(self): - return self.sbvalue.__nonzero__() - - def __bool__(self): - return self.sbvalue.__bool__() - - def __str__(self): - return self.sbvalue.__str__() - - def __getitem__(self, key): - # Allow array access if this value has children... - if type(key) is value: - key = int(key) - if type(key) is int: - child_sbvalue = (self.sbvalue.GetValueForExpressionPath("[%i]" % key)) - if child_sbvalue and child_sbvalue.IsValid(): - return value(child_sbvalue) - raise IndexError("Index '%d' is out of range" % key) - raise TypeError("No array item of type %s" % str(type(key))) - - def __iter__(self): - return value_iter(self.sbvalue) - - def __getattr__(self, name): - child_sbvalue = self.sbvalue.GetChildMemberWithName (name) - if child_sbvalue and child_sbvalue.IsValid(): - return value(child_sbvalue) - raise AttributeError("Attribute '%s' is not defined" % name) - - def __add__(self, other): - return int(self) + int(other) - - def __sub__(self, other): - return int(self) - int(other) - - def __mul__(self, other): - return int(self) * int(other) - - def __floordiv__(self, other): - return int(self) // int(other) - - def __mod__(self, other): - return int(self) % int(other) - - def __divmod__(self, other): - return int(self) % int(other) - - def __pow__(self, other): - return int(self) ** int(other) - - def __lshift__(self, other): - return int(self) << int(other) - - def __rshift__(self, other): - return int(self) >> int(other) - - def __and__(self, other): - return int(self) & int(other) - - def __xor__(self, other): - return int(self) ^ int(other) - - def __or__(self, other): - return int(self) | int(other) - - def __div__(self, other): - return int(self) / int(other) - - def __truediv__(self, other): - return int(self) / int(other) - - def __iadd__(self, other): - result = self.__add__(other) - self.sbvalue.SetValueFromCString (str(result)) - return result - - def __isub__(self, other): - result = self.__sub__(other) - self.sbvalue.SetValueFromCString (str(result)) - return result - - def __imul__(self, other): - result = self.__mul__(other) - self.sbvalue.SetValueFromCString (str(result)) - return result - - def __idiv__(self, other): - result = self.__div__(other) - self.sbvalue.SetValueFromCString (str(result)) - return result - - def __itruediv__(self, other): - result = self.__truediv__(other) - self.sbvalue.SetValueFromCString (str(result)) - return result - - def __ifloordiv__(self, other): - result = self.__floordiv__(self, other) - self.sbvalue.SetValueFromCString (str(result)) - return result - - def __imod__(self, other): - result = self.__and__(self, other) - self.sbvalue.SetValueFromCString (str(result)) - return result - - def __ipow__(self, other): - result = self.__pow__(self, other) - self.sbvalue.SetValueFromCString (str(result)) - return result - - def __ipow__(self, other, modulo): - result = self.__pow__(self, other, modulo) - self.sbvalue.SetValueFromCString (str(result)) - return result - - def __ilshift__(self, other): - result = self.__lshift__(other) - self.sbvalue.SetValueFromCString (str(result)) - return result - - def __irshift__(self, other): - result = self.__rshift__(other) - self.sbvalue.SetValueFromCString (str(result)) - return result - - def __iand__(self, other): - result = self.__and__(self, other) - self.sbvalue.SetValueFromCString (str(result)) - return result - - def __ixor__(self, other): - result = self.__xor__(self, other) - self.sbvalue.SetValueFromCString (str(result)) - return result - - def __ior__(self, other): - result = self.__ior__(self, other) - self.sbvalue.SetValueFromCString (str(result)) - return result - - def __neg__(self): - return -int(self) - - def __pos__(self): - return +int(self) - - def __abs__(self): - return abs(int(self)) - - def __invert__(self): - return ~int(self) - - def __complex__(self): - return complex (int(self)) - - def __int__(self): - is_num,is_sign = is_numeric_type(self.sbvalue.GetType().GetCanonicalType().GetBasicType()) - if is_num and not is_sign: return self.sbvalue.GetValueAsUnsigned() - return self.sbvalue.GetValueAsSigned() - - def __long__(self): - return self.__int__() - - def __float__(self): - return float (self.sbvalue.GetValueAsSigned()) - - def __oct__(self): - return '0%o' % self.sbvalue.GetValueAsUnsigned() - - def __hex__(self): - return '0x%x' % self.sbvalue.GetValueAsUnsigned() - - def __len__(self): - return self.sbvalue.GetNumChildren() - - def __eq__(self, other): - if type(other) is int: - return int(self) == other - elif type(other) is str: - return str(self) == other - elif type(other) is value: - self_err = SBError() - other_err = SBError() - self_val = self.sbvalue.GetValueAsUnsigned(self_err) - if self_err.fail: - raise ValueError("unable to extract value of self") - other_val = other.sbvalue.GetValueAsUnsigned(other_err) - if other_err.fail: - raise ValueError("unable to extract value of other") - return self_val == other_val - raise TypeError("Unknown type %s, No equality operation defined." % str(type(other))) - - def __ne__(self, other): - return not self.__eq__(other) -%} - -%pythoncode %{ - -class SBSyntheticValueProvider(object): - def __init__(self,valobj): - pass - - def num_children(self): - return 0 - - def get_child_index(self,name): - return None - - def get_child_at_index(self,idx): - return None - - def update(self): - pass - - def has_children(self): - return False - - -%} - -%pythoncode %{ - -# given an lldb.SBBasicType it returns a tuple -# (is_numeric, is_signed) -# the value of is_signed is undefined if is_numeric == false -def is_numeric_type(basic_type): - if basic_type == eBasicTypeInvalid: return (False,False) - if basic_type == eBasicTypeVoid: return (False,False) - if basic_type == eBasicTypeChar: return (True,False) - if basic_type == eBasicTypeSignedChar: return (True,True) - if basic_type == eBasicTypeUnsignedChar: return (True,False) - if basic_type == eBasicTypeWChar: return (True,False) - if basic_type == eBasicTypeSignedWChar: return (True,True) - if basic_type == eBasicTypeUnsignedWChar: return (True,False) - if basic_type == eBasicTypeChar16: return (True,False) - if basic_type == eBasicTypeChar32: return (True,False) - if basic_type == eBasicTypeShort: return (True,True) - if basic_type == eBasicTypeUnsignedShort: return (True,False) - if basic_type == eBasicTypeInt: return (True,True) - if basic_type == eBasicTypeUnsignedInt: return (True,False) - if basic_type == eBasicTypeLong: return (True,True) - if basic_type == eBasicTypeUnsignedLong: return (True,False) - if basic_type == eBasicTypeLongLong: return (True,True) - if basic_type == eBasicTypeUnsignedLongLong: return (True,False) - if basic_type == eBasicTypeInt128: return (True,True) - if basic_type == eBasicTypeUnsignedInt128: return (True,False) - if basic_type == eBasicTypeBool: return (False,False) - if basic_type == eBasicTypeHalf: return (True,True) - if basic_type == eBasicTypeFloat: return (True,True) - if basic_type == eBasicTypeDouble: return (True,True) - if basic_type == eBasicTypeLongDouble: return (True,True) - if basic_type == eBasicTypeFloatComplex: return (True,True) - if basic_type == eBasicTypeDoubleComplex: return (True,True) - if basic_type == eBasicTypeLongDoubleComplex: return (True,True) - if basic_type == eBasicTypeObjCID: return (False,False) - if basic_type == eBasicTypeObjCClass: return (False,False) - if basic_type == eBasicTypeObjCSel: return (False,False) - if basic_type == eBasicTypeNullPtr: return (False,False) - #if basic_type == eBasicTypeOther: - return (False,False) - -%} diff --git a/lldb/source/API/CMakeLists.txt b/lldb/source/API/CMakeLists.txt index eea409bed185a..e0ecf29b502b7 100644 --- a/lldb/source/API/CMakeLists.txt +++ b/lldb/source/API/CMakeLists.txt @@ -5,13 +5,13 @@ endif() get_property(LLDB_ALL_PLUGINS GLOBAL PROPERTY LLDB_PLUGINS) if(LLDB_ENABLE_PYTHON) - get_target_property(lldb_scripts_dir swig_wrapper BINARY_DIR) - set(lldb_python_wrapper ${lldb_scripts_dir}/LLDBWrapPython.cpp) + get_target_property(lldb_bindings_dir swig_wrapper BINARY_DIR) + set(lldb_python_wrapper ${lldb_bindings_dir}/LLDBWrapPython.cpp) endif() if(LLDB_ENABLE_LUA) - get_target_property(lldb_scripts_dir swig_wrapper_lua BINARY_DIR) - set(lldb_lua_wrapper ${lldb_scripts_dir}/LLDBWrapLua.cpp) + get_target_property(lldb_bindings_dir swig_wrapper_lua BINARY_DIR) + set(lldb_lua_wrapper ${lldb_bindings_dir}/LLDBWrapLua.cpp) endif() if(LLDB_BUILD_FRAMEWORK) diff --git a/lldb/source/Core/Section.cpp b/lldb/source/Core/Section.cpp index 4a9acab2e27c1..b1d7eee108b7d 100644 --- a/lldb/source/Core/Section.cpp +++ b/lldb/source/Core/Section.cpp @@ -280,29 +280,6 @@ bool Section::ContainsFileAddress(addr_t vm_addr) const { return false; } -int Section::Compare(const Section &a, const Section &b) { - if (&a == &b) - return 0; - - const ModuleSP a_module_sp = a.GetModule(); - const ModuleSP b_module_sp = b.GetModule(); - if (a_module_sp == b_module_sp) { - user_id_t a_sect_uid = a.GetID(); - user_id_t b_sect_uid = b.GetID(); - if (a_sect_uid < b_sect_uid) - return -1; - if (a_sect_uid > b_sect_uid) - return 1; - return 0; - } else { - // The modules are different, just compare the module pointers - if (a_module_sp.get() < b_module_sp.get()) - return -1; - else - return 1; // We already know the modules aren't equal - } -} - void Section::Dump(Stream *s, Target *target, uint32_t depth) const { // s->Printf("%.*p: ", (int)sizeof(void*) * 2, this); s->Indent(); diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp index c67e35b145189..69c84640ef93a 100644 --- a/lldb/source/Expression/DWARFExpression.cpp +++ b/lldb/source/Expression/DWARFExpression.cpp @@ -54,29 +54,6 @@ ReadAddressFromDebugAddrSection(const DWARFUnit *dwarf_cu, return LLDB_INVALID_ADDRESS; } -/// Return the location list parser for the given format. -static std::unique_ptr -GetLocationTable(DWARFExpression::LocationListFormat format, const DataExtractor &data) { - llvm::DWARFDataExtractor llvm_data( - toStringRef(data.GetData()), - data.GetByteOrder() == lldb::eByteOrderLittle, data.GetAddressByteSize()); - - switch (format) { - case DWARFExpression::NonLocationList: - return nullptr; - // DWARF<=4 .debug_loc - case DWARFExpression::RegularLocationList: - return std::make_unique(llvm_data); - // Non-standard DWARF 4 extension (fission) .debug_loc.dwo - case DWARFExpression::SplitDwarfLocationList: - // DWARF 5 .debug_loclists(.dwo) - case DWARFExpression::LocLists: - return std::make_unique( - llvm_data, format == DWARFExpression::LocLists ? 5 : 4); - } - llvm_unreachable("Invalid LocationListFormat!"); -} - // DWARFExpression constructor DWARFExpression::DWARFExpression() : m_module_wp(), m_data(), m_dwarf_cu(nullptr), @@ -157,10 +134,8 @@ void DWARFExpression::GetDescription(Stream *s, lldb::DescriptionLevel level, if (IsLocationList()) { // We have a location list lldb::offset_t offset = 0; - std::unique_ptr loctable_up = GetLocationTable( - m_dwarf_cu->GetSymbolFileDWARF().GetLocationListFormat(), m_data); - if (!loctable_up) - return; + std::unique_ptr loctable_up = + m_dwarf_cu->GetLocationTable(m_data); llvm::MCRegisterInfo *MRI = abi ? &abi->GetMCRegisterInfo() : nullptr; @@ -2812,10 +2787,8 @@ DWARFExpression::GetLocationExpression(addr_t load_function_start, addr_t addr) const { Log *log = GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS); - std::unique_ptr loctable_up = GetLocationTable( - m_dwarf_cu->GetSymbolFileDWARF().GetLocationListFormat(), m_data); - if (!loctable_up) - return llvm::None; + std::unique_ptr loctable_up = + m_dwarf_cu->GetLocationTable(m_data); llvm::Optional result; uint64_t offset = 0; auto lookup_addr = diff --git a/lldb/source/Interpreter/OptionValueProperties.cpp b/lldb/source/Interpreter/OptionValueProperties.cpp index 4dae930c3a6f4..21750cf186156 100644 --- a/lldb/source/Interpreter/OptionValueProperties.cpp +++ b/lldb/source/Interpreter/OptionValueProperties.cpp @@ -60,10 +60,10 @@ void OptionValueProperties::Initialize(const PropertyDefinitions &defs) { } void OptionValueProperties::SetValueChangedCallback( - uint32_t property_idx, OptionValueChangedCallback callback, void *baton) { + uint32_t property_idx, std::function callback) { Property *property = ProtectedGetPropertyAtIndex(property_idx); if (property) - property->SetValueChangedCallback(callback, baton); + property->SetValueChangedCallback(std::move(callback)); } void OptionValueProperties::AppendProperty(ConstString name, diff --git a/lldb/source/Interpreter/Property.cpp b/lldb/source/Interpreter/Property.cpp index 78209311e2e51..a81098373c257 100644 --- a/lldb/source/Interpreter/Property.cpp +++ b/lldb/source/Interpreter/Property.cpp @@ -292,8 +292,7 @@ void Property::DumpDescription(CommandInterpreter &interpreter, Stream &strm, } } -void Property::SetValueChangedCallback(OptionValueChangedCallback callback, - void *baton) { +void Property::SetValueChangedCallback(std::function callback) { if (m_value_sp) - m_value_sp->SetValueChangedCallback(callback, baton); + m_value_sp->SetValueChangedCallback(std::move(callback)); } diff --git a/lldb/source/Plugins/ScriptInterpreter/Lua/Lua.cpp b/lldb/source/Plugins/ScriptInterpreter/Lua/Lua.cpp index dc64139fa4e5c..1dd0a9eade0c8 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Lua/Lua.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Lua/Lua.cpp @@ -10,9 +10,9 @@ #include "llvm/Support/FormatVariadic.h" using namespace lldb_private; +using namespace lldb; llvm::Error Lua::Run(llvm::StringRef buffer) { - std::lock_guard lock(m_mutex); int error = luaL_loadbuffer(m_lua_state, buffer.data(), buffer.size(), "buffer") || lua_pcall(m_lua_state, 0, 0, 0); diff --git a/lldb/source/Plugins/ScriptInterpreter/Lua/Lua.h b/lldb/source/Plugins/ScriptInterpreter/Lua/Lua.h index ed1d159590ac5..adc6c61184367 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Lua/Lua.h +++ b/lldb/source/Plugins/ScriptInterpreter/Lua/Lua.h @@ -9,6 +9,7 @@ #ifndef liblldb_Lua_h_ #define liblldb_Lua_h_ +#include "lldb/lldb-types.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" @@ -38,7 +39,6 @@ class Lua { llvm::Error Run(llvm::StringRef buffer); private: - std::mutex m_mutex; lua_State *m_lua_state; }; diff --git a/lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.cpp b/lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.cpp index d5423b78b8c43..e46851c450920 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.cpp @@ -27,7 +27,13 @@ class IOHandlerLuaInterpreter : public IOHandlerDelegate, : IOHandlerEditline(debugger, IOHandler::Type::LuaInterpreter, "lua", ">>> ", "..> ", true, debugger.GetUseColor(), 0, *this, nullptr), - m_script_interpreter(script_interpreter) {} + m_script_interpreter(script_interpreter) { + llvm::cantFail(m_script_interpreter.EnterSession(debugger.GetID())); + } + + ~IOHandlerLuaInterpreter() { + llvm::cantFail(m_script_interpreter.LeaveSession()); + } void IOHandlerInputComplete(IOHandler &io_handler, std::string &data) override { @@ -89,6 +95,33 @@ void ScriptInterpreterLua::Initialize() { void ScriptInterpreterLua::Terminate() {} +llvm::Error ScriptInterpreterLua::EnterSession(user_id_t debugger_id) { + if (m_session_is_active) + return llvm::Error::success(); + + const char *fmt_str = + "lldb.debugger = lldb.SBDebugger.FindDebuggerWithID({0}); " + "lldb.target = lldb.debugger:GetSelectedTarget(); " + "lldb.process = lldb.target:GetProcess(); " + "lldb.thread = lldb.process:GetSelectedThread(); " + "lldb.frame = lldb.thread:GetSelectedFrame()"; + return m_lua->Run(llvm::formatv(fmt_str, debugger_id).str()); +} + +llvm::Error ScriptInterpreterLua::LeaveSession() { + if (!m_session_is_active) + return llvm::Error::success(); + + m_session_is_active = false; + + llvm::StringRef str = "lldb.debugger = nil; " + "lldb.target = nil; " + "lldb.process = nil; " + "lldb.thread = nil; " + "lldb.frame = nil"; + return m_lua->Run(str); +} + lldb::ScriptInterpreterSP ScriptInterpreterLua::CreateInstance(Debugger &debugger) { return std::make_shared(debugger); diff --git a/lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.h b/lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.h index b34c7d0e82176..550e1035567ca 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.h +++ b/lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.h @@ -43,8 +43,12 @@ class ScriptInterpreterLua : public ScriptInterpreter { Lua &GetLua(); + llvm::Error EnterSession(lldb::user_id_t debugger_id); + llvm::Error LeaveSession(); + private: std::unique_ptr m_lua; + bool m_session_is_active = false; }; } // namespace lldb_private diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index bb75c699352e7..232063a6f3390 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -2298,54 +2298,6 @@ size_t DWARFASTParserClang::ParseChildEnumerators( return enumerators_added; } -#if defined(LLDB_CONFIGURATION_DEBUG) || defined(LLDB_CONFIGURATION_RELEASE) - -class DIEStack { -public: - void Push(const DWARFDIE &die) { m_dies.push_back(die); } - - void LogDIEs(Log *log) { - StreamString log_strm; - const size_t n = m_dies.size(); - log_strm.Printf("DIEStack[%" PRIu64 "]:\n", (uint64_t)n); - for (size_t i = 0; i < n; i++) { - std::string qualified_name; - const DWARFDIE &die = m_dies[i]; - die.GetQualifiedName(qualified_name); - log_strm.Printf("[%" PRIu64 "] 0x%8.8x: %s name='%s'\n", (uint64_t)i, - die.GetOffset(), die.GetTagAsCString(), - qualified_name.c_str()); - } - log->PutCString(log_strm.GetData()); - } - void Pop() { m_dies.pop_back(); } - - class ScopedPopper { - public: - ScopedPopper(DIEStack &die_stack) - : m_die_stack(die_stack), m_valid(false) {} - - void Push(const DWARFDIE &die) { - m_valid = true; - m_die_stack.Push(die); - } - - ~ScopedPopper() { - if (m_valid) - m_die_stack.Pop(); - } - - protected: - DIEStack &m_die_stack; - bool m_valid; - }; - -protected: - typedef std::vector Stack; - Stack m_dies; -}; -#endif - Function *DWARFASTParserClang::ParseFunctionFromDWARF(CompileUnit &comp_unit, const DWARFDIE &die) { DWARFRangeList func_ranges; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h index 75647dbb082f2..454637ef981c7 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h @@ -24,8 +24,8 @@ class DWARFCompileUnit : public DWARFUnit { DWARFCompileUnit(SymbolFileDWARF &dwarf, lldb::user_id_t uid, const DWARFUnitHeader &header, const DWARFAbbreviationDeclarationSet &abbrevs, - DIERef::Section section) - : DWARFUnit(dwarf, uid, header, abbrevs, section) {} + DIERef::Section section, bool is_dwo) + : DWARFUnit(dwarf, uid, header, abbrevs, section, is_dwo) {} DISALLOW_COPY_AND_ASSIGN(DWARFCompileUnit); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h index add0423840397..24baac90aa445 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h @@ -41,8 +41,6 @@ class DWARFContext { SectionData m_data_debug_str_offsets; SectionData m_data_debug_types; - bool isDwo() { return m_dwo_section_list != nullptr; } - const DWARFDataExtractor & LoadOrGetSection(lldb::SectionType main_section_type, llvm::Optional dwo_section_type, @@ -67,6 +65,8 @@ class DWARFContext { const DWARFDataExtractor &getOrLoadStrOffsetsData(); const DWARFDataExtractor &getOrLoadDebugTypesData(); + bool isDwo() { return m_dwo_section_list != nullptr; } + llvm::DWARFContext &GetAsLLVM(); }; } // namespace lldb_private diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp index 5612c59059bed..5b95912909ee9 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp @@ -344,7 +344,7 @@ bool DWARFDebugInfoEntry::GetDIENamesAndRanges( *frame_base = DWARFExpression( module, DataExtractor(data, block_offset, block_length), cu); } else { - DataExtractor data = dwarf.DebugLocData(); + DataExtractor data = cu->GetLocationData(); const dw_offset_t offset = form_value.Unsigned(); if (data.ValidOffset(offset)) { data = DataExtractor(data, offset, data.GetByteSize() - offset); @@ -478,8 +478,6 @@ void DWARFDebugInfoEntry::DumpAttribute( s.PutCString("( "); - SymbolFileDWARF &dwarf = cu->GetSymbolFileDWARF(); - // Check to see if we have any special attribute formatters switch (attr) { case DW_AT_stmt_list: @@ -509,7 +507,7 @@ void DWARFDebugInfoEntry::DumpAttribute( // We have a location list offset as the value that is the offset into // the .debug_loc section that describes the value over it's lifetime uint64_t debug_loc_offset = form_value.Unsigned(); - DWARFExpression::PrintDWARFLocationList(s, cu, dwarf.DebugLocData(), + DWARFExpression::PrintDWARFLocationList(s, cu, cu->GetLocationData(), debug_loc_offset); } } break; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp index 5cab4cef143e0..f660cc32b3f8d 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp @@ -602,101 +602,6 @@ bool DWARFFormValue::IsDataForm(const dw_form_t form) { return false; } -int DWARFFormValue::Compare(const DWARFFormValue &a_value, - const DWARFFormValue &b_value) { - dw_form_t a_form = a_value.Form(); - dw_form_t b_form = b_value.Form(); - if (a_form < b_form) - return -1; - if (a_form > b_form) - return 1; - switch (a_form) { - case DW_FORM_addr: - case DW_FORM_addrx: - case DW_FORM_flag: - case DW_FORM_data1: - case DW_FORM_data2: - case DW_FORM_data4: - case DW_FORM_data8: - case DW_FORM_udata: - case DW_FORM_ref_addr: - case DW_FORM_sec_offset: - case DW_FORM_flag_present: - case DW_FORM_ref_sig8: - case DW_FORM_GNU_addr_index: { - uint64_t a = a_value.Unsigned(); - uint64_t b = b_value.Unsigned(); - if (a < b) - return -1; - if (a > b) - return 1; - return 0; - } - - case DW_FORM_sdata: { - int64_t a = a_value.Signed(); - int64_t b = b_value.Signed(); - if (a < b) - return -1; - if (a > b) - return 1; - return 0; - } - - case DW_FORM_string: - case DW_FORM_strp: - case DW_FORM_GNU_str_index: { - const char *a_string = a_value.AsCString(); - const char *b_string = b_value.AsCString(); - if (a_string == b_string) - return 0; - else if (a_string && b_string) - return strcmp(a_string, b_string); - else if (a_string == nullptr) - return -1; // A string is NULL, and B is valid - else - return 1; // A string valid, and B is NULL - } - - case DW_FORM_block: - case DW_FORM_block1: - case DW_FORM_block2: - case DW_FORM_block4: - case DW_FORM_exprloc: { - uint64_t a_len = a_value.Unsigned(); - uint64_t b_len = b_value.Unsigned(); - if (a_len < b_len) - return -1; - if (a_len > b_len) - return 1; - // The block lengths are the same - return memcmp(a_value.BlockData(), b_value.BlockData(), a_value.Unsigned()); - } break; - - case DW_FORM_ref1: - case DW_FORM_ref2: - case DW_FORM_ref4: - case DW_FORM_ref8: - case DW_FORM_ref_udata: { - uint64_t a = a_value.m_value.value.uval; - uint64_t b = b_value.m_value.value.uval; - if (a < b) - return -1; - if (a > b) - return 1; - return 0; - } - - case DW_FORM_indirect: - llvm_unreachable( - "This shouldn't happen after the form has been extracted..."); - - default: - llvm_unreachable("Unhandled DW_FORM"); - } - return -1; -} - bool DWARFFormValue::FormIsSupported(dw_form_t form) { switch (form) { case DW_FORM_addr: diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h index 6ff73ecd8efa3..8967509c081aa 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h @@ -28,8 +28,8 @@ class DWARFTypeUnit : public DWARFUnit { DWARFTypeUnit(SymbolFileDWARF &dwarf, lldb::user_id_t uid, const DWARFUnitHeader &header, const DWARFAbbreviationDeclarationSet &abbrevs, - DIERef::Section section) - : DWARFUnit(dwarf, uid, header, abbrevs, section) {} + DIERef::Section section, bool is_dwo) + : DWARFUnit(dwarf, uid, header, abbrevs, section, is_dwo) {} friend class DWARFUnit; }; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp index d8d70bae0232d..dcb38da3c43ee 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp @@ -32,9 +32,9 @@ extern int g_verbose; DWARFUnit::DWARFUnit(SymbolFileDWARF &dwarf, lldb::user_id_t uid, const DWARFUnitHeader &header, const DWARFAbbreviationDeclarationSet &abbrevs, - DIERef::Section section) + DIERef::Section section, bool is_dwo) : UserID(uid), m_dwarf(dwarf), m_header(header), m_abbrevs(&abbrevs), - m_cancel_scopes(false), m_section(section) {} + m_cancel_scopes(false), m_section(section), m_is_dwo(is_dwo) {} DWARFUnit::~DWARFUnit() = default; @@ -336,6 +336,9 @@ void DWARFUnit::AddUnitDIE(const DWARFDebugInfoEntry &cu_die) { } } + if (m_is_dwo) + return; + std::unique_ptr dwo_symbol_file = m_dwarf.GetDwoSymbolFileForCompileUnit(*this, cu_die); if (!dwo_symbol_file) @@ -459,6 +462,22 @@ void DWARFUnit::SetLoclistsBase(dw_addr_t loclists_base) { } } +std::unique_ptr +DWARFUnit::GetLocationTable(const DataExtractor &data) const { + llvm::DWARFDataExtractor llvm_data( + toStringRef(data.GetData()), + data.GetByteOrder() == lldb::eByteOrderLittle, data.GetAddressByteSize()); + + if (m_is_dwo || GetVersion() >= 5) + return std::make_unique(llvm_data, GetVersion()); + return std::make_unique(llvm_data); +} + +const DWARFDataExtractor &DWARFUnit::GetLocationData() const { + return GetVersion() >= 5 ? GetSymbolFileDWARF().get_debug_loclists_data() + : GetSymbolFileDWARF().get_debug_loc_data(); +} + void DWARFUnit::SetRangesBase(dw_addr_t ranges_base) { m_ranges_base = ranges_base; @@ -872,11 +891,12 @@ DWARFUnit::extract(SymbolFileDWARF &dwarf, user_id_t uid, return llvm::make_error( "No abbrev exists at the specified offset."); + bool is_dwo = dwarf.GetDWARFContext().isDwo(); if (expected_header->IsTypeUnit()) - return DWARFUnitSP( - new DWARFTypeUnit(dwarf, uid, *expected_header, *abbrevs, section)); - return DWARFUnitSP( - new DWARFCompileUnit(dwarf, uid, *expected_header, *abbrevs, section)); + return DWARFUnitSP(new DWARFTypeUnit(dwarf, uid, *expected_header, *abbrevs, + section, is_dwo)); + return DWARFUnitSP(new DWARFCompileUnit(dwarf, uid, *expected_header, + *abbrevs, section, is_dwo)); } const lldb_private::DWARFDataExtractor &DWARFUnit::GetData() const { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h index d53ed756fe05d..6bee4ab8be8e7 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -243,11 +243,18 @@ class DWARFUnit : public lldb_private::UserID { return *Offset + m_loclists_base; } + /// Return the location table for parsing the given location list data. The + /// format is chosen according to the unit type. Never returns null. + std::unique_ptr + GetLocationTable(const lldb_private::DataExtractor &data) const; + + const lldb_private::DWARFDataExtractor &GetLocationData() const; + protected: DWARFUnit(SymbolFileDWARF &dwarf, lldb::user_id_t uid, const DWARFUnitHeader &header, const DWARFAbbreviationDeclarationSet &abbrevs, - DIERef::Section section); + DIERef::Section section, bool is_dwo); llvm::Error ExtractHeader(SymbolFileDWARF &dwarf, const lldb_private::DWARFDataExtractor &data, @@ -314,6 +321,7 @@ class DWARFUnit : public lldb_private::UserID { llvm::Optional m_loclist_table_header; const DIERef::Section m_section; + bool m_is_dwo; private: void ParseProducerInfo(); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 53339ea31e71a..0792260e36fe5 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -597,13 +597,6 @@ void SymbolFileDWARF::LoadSectionData(lldb::SectionType sect_type, m_objfile_sp->ReadSectionData(section_sp.get(), data); } -const DWARFDataExtractor &SymbolFileDWARF::DebugLocData() { - const DWARFDataExtractor &debugLocData = get_debug_loc_data(); - if (debugLocData.GetByteSize() > 0) - return debugLocData; - return get_debug_loclists_data(); -} - const DWARFDataExtractor &SymbolFileDWARF::get_debug_loc_data() { return GetCachedSectionData(eSectionTypeDWARFDebugLoc, m_data_debug_loc); } @@ -3361,7 +3354,7 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc, module, DataExtractor(data, block_offset, block_length), die.GetCU()); } else { - DataExtractor data = DebugLocData(); + DataExtractor data = die.GetCU()->GetLocationData(); dw_offset_t offset = form_value.Unsigned(); if (form_value.Form() == DW_FORM_loclistx) offset = die.GetCU()->GetLoclistOffset(offset).getValueOr(-1); @@ -3978,13 +3971,6 @@ SymbolFileDWARFDebugMap *SymbolFileDWARF::GetDebugMapSymfile() { return m_debug_map_symfile; } -DWARFExpression::LocationListFormat -SymbolFileDWARF::GetLocationListFormat() const { - if (m_data_debug_loclists.m_data.GetByteSize() > 0) - return DWARFExpression::LocLists; - return DWARFExpression::RegularLocationList; -} - SymbolFileDWARFDwp *SymbolFileDWARF::GetDwpSymbolFile() { llvm::call_once(m_dwp_symfile_once_flag, [this]() { ModuleSpec module_spec; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index 35b18f4b02b35..f816dd77800e4 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -21,7 +21,6 @@ #include "lldb/Core/UniqueCStringMap.h" #include "lldb/Core/dwarf.h" -#include "lldb/Expression/DWARFExpression.h" #include "lldb/Symbol/DebugMacros.h" #include "lldb/Symbol/SymbolContext.h" #include "lldb/Symbol/SymbolFile.h" @@ -236,8 +235,6 @@ class SymbolFileDWARF : public lldb_private::SymbolFile, DWARFDebugRanges *GetDebugRanges(); - const lldb_private::DWARFDataExtractor &DebugLocData(); - static bool SupportedVersion(uint16_t version); DWARFDIE @@ -260,9 +257,6 @@ class SymbolFileDWARF : public lldb_private::SymbolFile, virtual lldb::CompUnitSP ParseCompileUnit(DWARFCompileUnit &dwarf_cu); - virtual lldb_private::DWARFExpression::LocationListFormat - GetLocationListFormat() const; - lldb::ModuleSP GetExternalModule(lldb_private::ConstString name); typedef std::map @@ -287,7 +281,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFile, lldb::user_id_t GetUID(DIERef ref); - virtual std::unique_ptr + std::unique_ptr GetDwoSymbolFileForCompileUnit(DWARFUnit &dwarf_cu, const DWARFDebugInfoEntry &cu_die); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp index 331417fe5cd12..f75f06f31e2da 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp @@ -137,13 +137,6 @@ SymbolFileDWARF &SymbolFileDWARFDwo::GetBaseSymbolFile() { return m_base_dwarf_cu.GetSymbolFileDWARF(); } -DWARFExpression::LocationListFormat -SymbolFileDWARFDwo::GetLocationListFormat() const { - return m_base_dwarf_cu.GetVersion() >= 5 - ? DWARFExpression::LocLists - : DWARFExpression::SplitDwarfLocationList; -} - llvm::Expected SymbolFileDWARFDwo::GetTypeSystemForLanguage(LanguageType language) { return GetBaseSymbolFile().GetTypeSystemForLanguage(language); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h index d07209784dd7e..0855dba044e4f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h @@ -35,9 +35,6 @@ class SymbolFileDWARFDwo : public SymbolFileDWARF { DWARFUnit * GetDWARFCompileUnit(lldb_private::CompileUnit *comp_unit) override; - lldb_private::DWARFExpression::LocationListFormat - GetLocationListFormat() const override; - size_t GetObjCMethodDIEOffsets(lldb_private::ConstString class_name, DIEArray &method_die_offsets) override; @@ -47,12 +44,6 @@ class SymbolFileDWARFDwo : public SymbolFileDWARF { DWARFDIE GetDIE(const DIERef &die_ref) override; - std::unique_ptr - GetDwoSymbolFileForCompileUnit(DWARFUnit &dwarf_cu, - const DWARFDebugInfoEntry &cu_die) override { - return nullptr; - } - DWARFCompileUnit *GetBaseCompileUnit() override { return &m_base_dwarf_cu; } llvm::Optional GetDwoNum() override { return GetID() >> 32; } diff --git a/lldb/source/Symbol/Function.cpp b/lldb/source/Symbol/Function.cpp index c392317df0066..e92585ccfed70 100644 --- a/lldb/source/Symbol/Function.cpp +++ b/lldb/source/Symbol/Function.cpp @@ -76,16 +76,6 @@ InlineFunctionInfo::InlineFunctionInfo(ConstString name, InlineFunctionInfo::~InlineFunctionInfo() {} -int InlineFunctionInfo::Compare(const InlineFunctionInfo &a, - const InlineFunctionInfo &b) { - - int result = FunctionInfo::Compare(a, b); - if (result) - return result; - // only compare the mangled names if both have them - return Mangled::Compare(a.m_mangled, a.m_mangled); -} - void InlineFunctionInfo::Dump(Stream *s, bool show_fullpaths) const { FunctionInfo::Dump(s, show_fullpaths); if (m_mangled) diff --git a/lldb/source/Symbol/Type.cpp b/lldb/source/Symbol/Type.cpp index aeca76f7d05f8..f194356a0a079 100644 --- a/lldb/source/Symbol/Type.cpp +++ b/lldb/source/Symbol/Type.cpp @@ -656,17 +656,6 @@ CompilerType Type::GetForwardCompilerType() { return m_compiler_type; } -int Type::Compare(const Type &a, const Type &b) { - // Just compare the UID values for now... - lldb::user_id_t a_uid = a.GetID(); - lldb::user_id_t b_uid = b.GetID(); - if (a_uid < b_uid) - return -1; - if (a_uid > b_uid) - return 1; - return 0; -} - ConstString Type::GetQualifiedName() { return GetForwardCompilerType().GetConstTypeName(); } diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp index a8fb32dafa898..6711dc37eca63 100644 --- a/lldb/source/Target/Process.cpp +++ b/lldb/source/Target/Process.cpp @@ -137,19 +137,12 @@ ProcessProperties::ProcessProperties(lldb_private::Process *process) Process::GetGlobalProperties().get()); m_collection_sp->SetValueChangedCallback( ePropertyPythonOSPluginPath, - ProcessProperties::OptionValueChangedCallback, this); + [this] { m_process->LoadOperatingSystemPlugin(true); }); } } ProcessProperties::~ProcessProperties() = default; -void ProcessProperties::OptionValueChangedCallback(void *baton, - OptionValue *option_value) { - ProcessProperties *properties = (ProcessProperties *)baton; - if (properties->m_process) - properties->m_process->LoadOperatingSystemPlugin(true); -} - bool ProcessProperties::GetDisableMemoryCache() const { const uint32_t idx = ePropertyDisableMemCache; return m_collection_sp->GetPropertyAtIndexAsBoolean( diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index e35a10a3f6bf8..83e6f3062666a 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -3461,29 +3461,24 @@ TargetProperties::TargetProperties(Target *target) // Set callbacks to update launch_info whenever "settins set" updated any // of these properties m_collection_sp->SetValueChangedCallback( - ePropertyArg0, TargetProperties::Arg0ValueChangedCallback, this); + ePropertyArg0, [this] { Arg0ValueChangedCallback(); }); m_collection_sp->SetValueChangedCallback( - ePropertyRunArgs, TargetProperties::RunArgsValueChangedCallback, this); + ePropertyRunArgs, [this] { RunArgsValueChangedCallback(); }); m_collection_sp->SetValueChangedCallback( - ePropertyEnvVars, TargetProperties::EnvVarsValueChangedCallback, this); + ePropertyEnvVars, [this] { EnvVarsValueChangedCallback(); }); m_collection_sp->SetValueChangedCallback( - ePropertyInputPath, TargetProperties::InputPathValueChangedCallback, - this); + ePropertyInputPath, [this] { InputPathValueChangedCallback(); }); m_collection_sp->SetValueChangedCallback( - ePropertyOutputPath, TargetProperties::OutputPathValueChangedCallback, - this); + ePropertyOutputPath, [this] { OutputPathValueChangedCallback(); }); m_collection_sp->SetValueChangedCallback( - ePropertyErrorPath, TargetProperties::ErrorPathValueChangedCallback, - this); + ePropertyErrorPath, [this] { ErrorPathValueChangedCallback(); }); + m_collection_sp->SetValueChangedCallback(ePropertyDetachOnError, [this] { + DetachOnErrorValueChangedCallback(); + }); m_collection_sp->SetValueChangedCallback( - ePropertyDetachOnError, - TargetProperties::DetachOnErrorValueChangedCallback, this); + ePropertyDisableASLR, [this] { DisableASLRValueChangedCallback(); }); m_collection_sp->SetValueChangedCallback( - ePropertyDisableASLR, TargetProperties::DisableASLRValueChangedCallback, - this); - m_collection_sp->SetValueChangedCallback( - ePropertyDisableSTDIO, - TargetProperties::DisableSTDIOValueChangedCallback, this); + ePropertyDisableSTDIO, [this] { DisableSTDIOValueChangedCallback(); }); m_experimental_properties_up.reset(new TargetExperimentalProperties()); m_collection_sp->AppendProperty( @@ -3493,16 +3488,16 @@ TargetProperties::TargetProperties(Target *target) true, m_experimental_properties_up->GetValueProperties()); // Update m_launch_info once it was created - Arg0ValueChangedCallback(this, nullptr); - RunArgsValueChangedCallback(this, nullptr); - // EnvVarsValueChangedCallback(this, nullptr); // FIXME: cause segfault in + Arg0ValueChangedCallback(); + RunArgsValueChangedCallback(); + // EnvVarsValueChangedCallback(); // FIXME: cause segfault in // Target::GetPlatform() - InputPathValueChangedCallback(this, nullptr); - OutputPathValueChangedCallback(this, nullptr); - ErrorPathValueChangedCallback(this, nullptr); - DetachOnErrorValueChangedCallback(this, nullptr); - DisableASLRValueChangedCallback(this, nullptr); - DisableSTDIOValueChangedCallback(this, nullptr); + InputPathValueChangedCallback(); + OutputPathValueChangedCallback(); + ErrorPathValueChangedCallback(); + DetachOnErrorValueChangedCallback(); + DisableASLRValueChangedCallback(); + DisableSTDIOValueChangedCallback(); } else { m_collection_sp = std::make_shared(ConstString("target")); @@ -3975,81 +3970,54 @@ void TargetProperties::SetRequireHardwareBreakpoints(bool b) { m_collection_sp->SetPropertyAtIndexAsBoolean(nullptr, idx, b); } -void TargetProperties::Arg0ValueChangedCallback(void *target_property_ptr, - OptionValue *) { - TargetProperties *this_ = - static_cast(target_property_ptr); - this_->m_launch_info.SetArg0(this_->GetArg0()); +void TargetProperties::Arg0ValueChangedCallback() { + m_launch_info.SetArg0(GetArg0()); } -void TargetProperties::RunArgsValueChangedCallback(void *target_property_ptr, - OptionValue *) { - TargetProperties *this_ = - static_cast(target_property_ptr); +void TargetProperties::RunArgsValueChangedCallback() { Args args; - if (this_->GetRunArguments(args)) - this_->m_launch_info.GetArguments() = args; + if (GetRunArguments(args)) + m_launch_info.GetArguments() = args; } -void TargetProperties::EnvVarsValueChangedCallback(void *target_property_ptr, - OptionValue *) { - TargetProperties *this_ = - static_cast(target_property_ptr); - this_->m_launch_info.GetEnvironment() = this_->GetEnvironment(); +void TargetProperties::EnvVarsValueChangedCallback() { + m_launch_info.GetEnvironment() = GetEnvironment(); } -void TargetProperties::InputPathValueChangedCallback(void *target_property_ptr, - OptionValue *) { - TargetProperties *this_ = - static_cast(target_property_ptr); - this_->m_launch_info.AppendOpenFileAction( - STDIN_FILENO, this_->GetStandardInputPath(), true, false); +void TargetProperties::InputPathValueChangedCallback() { + m_launch_info.AppendOpenFileAction(STDIN_FILENO, GetStandardInputPath(), true, + false); } -void TargetProperties::OutputPathValueChangedCallback(void *target_property_ptr, - OptionValue *) { - TargetProperties *this_ = - static_cast(target_property_ptr); - this_->m_launch_info.AppendOpenFileAction( - STDOUT_FILENO, this_->GetStandardOutputPath(), false, true); +void TargetProperties::OutputPathValueChangedCallback() { + m_launch_info.AppendOpenFileAction(STDOUT_FILENO, GetStandardOutputPath(), + false, true); } -void TargetProperties::ErrorPathValueChangedCallback(void *target_property_ptr, - OptionValue *) { - TargetProperties *this_ = - static_cast(target_property_ptr); - this_->m_launch_info.AppendOpenFileAction( - STDERR_FILENO, this_->GetStandardErrorPath(), false, true); +void TargetProperties::ErrorPathValueChangedCallback() { + m_launch_info.AppendOpenFileAction(STDERR_FILENO, GetStandardErrorPath(), + false, true); } -void TargetProperties::DetachOnErrorValueChangedCallback( - void *target_property_ptr, OptionValue *) { - TargetProperties *this_ = - static_cast(target_property_ptr); - if (this_->GetDetachOnError()) - this_->m_launch_info.GetFlags().Set(lldb::eLaunchFlagDetachOnError); +void TargetProperties::DetachOnErrorValueChangedCallback() { + if (GetDetachOnError()) + m_launch_info.GetFlags().Set(lldb::eLaunchFlagDetachOnError); else - this_->m_launch_info.GetFlags().Clear(lldb::eLaunchFlagDetachOnError); + m_launch_info.GetFlags().Clear(lldb::eLaunchFlagDetachOnError); } -void TargetProperties::DisableASLRValueChangedCallback( - void *target_property_ptr, OptionValue *) { - TargetProperties *this_ = - static_cast(target_property_ptr); - if (this_->GetDisableASLR()) - this_->m_launch_info.GetFlags().Set(lldb::eLaunchFlagDisableASLR); +void TargetProperties::DisableASLRValueChangedCallback() { + if (GetDisableASLR()) + m_launch_info.GetFlags().Set(lldb::eLaunchFlagDisableASLR); else - this_->m_launch_info.GetFlags().Clear(lldb::eLaunchFlagDisableASLR); + m_launch_info.GetFlags().Clear(lldb::eLaunchFlagDisableASLR); } -void TargetProperties::DisableSTDIOValueChangedCallback( - void *target_property_ptr, OptionValue *) { - TargetProperties *this_ = - static_cast(target_property_ptr); - if (this_->GetDisableSTDIO()) - this_->m_launch_info.GetFlags().Set(lldb::eLaunchFlagDisableSTDIO); +void TargetProperties::DisableSTDIOValueChangedCallback() { + if (GetDisableSTDIO()) + m_launch_info.GetFlags().Set(lldb::eLaunchFlagDisableSTDIO); else - this_->m_launch_info.GetFlags().Clear(lldb::eLaunchFlagDisableSTDIO); + m_launch_info.GetFlags().Clear(lldb::eLaunchFlagDisableSTDIO); } // Target::TargetEventData diff --git a/lldb/test/Shell/ScriptInterpreter/Lua/Inputs/independent_state.in b/lldb/test/Shell/ScriptInterpreter/Lua/Inputs/independent_state.in new file mode 100644 index 0000000000000..6e15a8ff663ec --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Lua/Inputs/independent_state.in @@ -0,0 +1,6 @@ +script foobar = 40 + 7 +script print(foobar) +script d = lldb.SBDebugger.Create() +script d:HandleCommand("script foobar = 40 + 2") +script print(foobar) +script d:HandleCommand("script print(foobar)") diff --git a/lldb/test/Shell/ScriptInterpreter/Lua/Inputs/nested_sessions.in b/lldb/test/Shell/ScriptInterpreter/Lua/Inputs/nested_sessions.in new file mode 100644 index 0000000000000..75c57e364cac7 --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Lua/Inputs/nested_sessions.in @@ -0,0 +1,6 @@ +script +print(lldb.target, lldb.debugger:GetSelectedTarget()) +lldb.debugger:SetSelectedTarget(lldb.debugger:GetTargetAtIndex(0)) +print(lldb.target, lldb.debugger:GetSelectedTarget()) +lldb.debugger:HandleCommand("script print(lldb.target, lldb.debugger:GetSelectedTarget())") +print(lldb.target, lldb.debugger:GetSelectedTarget()) diff --git a/lldb/test/Shell/ScriptInterpreter/Lua/Inputs/nested_sessions_2.in b/lldb/test/Shell/ScriptInterpreter/Lua/Inputs/nested_sessions_2.in new file mode 100644 index 0000000000000..a8cc2a57a55b3 --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Lua/Inputs/nested_sessions_2.in @@ -0,0 +1,2 @@ +script +print(lldb.target, lldb.debugger:GetSelectedTarget()) diff --git a/lldb/test/Shell/ScriptInterpreter/Lua/convenience_variables.test b/lldb/test/Shell/ScriptInterpreter/Lua/convenience_variables.test new file mode 100644 index 0000000000000..022f2e38db49a --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Lua/convenience_variables.test @@ -0,0 +1,17 @@ +# REQUIRES: lua +# +# This tests that the convenience variables are not nil. Given that there is no +# target we only expect the debugger to be valid. +# +# RUN: cat %s | %lldb --script-language lua 2>&1 | FileCheck %s +script +print(string.format("lldb.debugger is valid: %s", lldb.debugger:IsValid())) +print(string.format("lldb.target is valid: %s", lldb.target:IsValid())) +print(string.format("lldb.process is valid: %s", lldb.process:IsValid())) +print(string.format("lldb.thread is valid: %s", lldb.thread:IsValid())) +print(string.format("lldb.frame is valid: %s", lldb.frame:IsValid())) +# CHECK: debugger is valid: true +# CHECK: target is valid: false +# CHECK: process is valid: false +# CHECK: thread is valid: false +# CHECK: frame is valid: false diff --git a/lldb/test/Shell/ScriptInterpreter/Lua/independent_state.test b/lldb/test/Shell/ScriptInterpreter/Lua/independent_state.test new file mode 100644 index 0000000000000..2ade1b91c1cb6 --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Lua/independent_state.test @@ -0,0 +1,6 @@ +# REQUIRES: lua +# +# RUN: %lldb --script-language lua -s %S/Inputs/independent_state.in 2>&1 | FileCheck %s +# CHECK: 47 +# CHECK: 47 +# CHECK: 42 diff --git a/lldb/test/Shell/ScriptInterpreter/Lua/nested_sessions.test b/lldb/test/Shell/ScriptInterpreter/Lua/nested_sessions.test new file mode 100644 index 0000000000000..a81418b6af61d --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Lua/nested_sessions.test @@ -0,0 +1,12 @@ +# REQUIRES: lua +# RUN: mkdir -p %t +# RUN: echo "int main() { return 0; }" | %clang_host -x c - -o %t/foo +# RUN: echo "int main() { return 0; }" | %clang_host -x c - -o %t/bar +# RUN: %lldb --script-language lua -o "file %t/bar" -o "file %t/foo" -s %S/Inputs/nested_sessions.in -s %S/Inputs/nested_sessions_2.in 2>&1 | FileCheck %s +# CHECK: script +# CHECK-NEXT: foo foo +# CHECK-NEXT: foo bar +# CHECK-NEXT: foo bar +# CHECK-NEXT: foo bar +# CHECK: script +# CHECK-NEXT: bar bar diff --git a/lldb/test/Shell/SymbolFile/DWARF/debug_loc_and_loclists.s b/lldb/test/Shell/SymbolFile/DWARF/debug_loc_and_loclists.s new file mode 100644 index 0000000000000..05bccbe78aabf --- /dev/null +++ b/lldb/test/Shell/SymbolFile/DWARF/debug_loc_and_loclists.s @@ -0,0 +1,154 @@ +# Test that we can handle DWARF 4 and 5 location lists in the same object file +# (but different compile units). + +# REQUIRES: x86 + +# RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj %s > %t +# RUN: %lldb %t -o "image lookup -v -s loc" -o "image lookup -v -s loclists" \ +# RUN: -o exit | FileCheck %s + + +# CHECK-LABEL: image lookup -v -s loc +# CHECK: Variable: {{.*}}, name = "x0", type = "int", location = DW_OP_reg5 RDI, + +# CHECK-LABEL: image lookup -v -s loclists +# CHECK: Variable: {{.*}}, name = "x1", type = "int", location = DW_OP_reg0 RAX, + + +loc: + nop +.Lloc_end: + +loclists: + nop +.Lloclists_end: + + .section .debug_loc,"",@progbits +.Lloc_list: + .quad loc-loc + .quad .Lloc_end-loc + .short 1 + .byte 85 # super-register DW_OP_reg5 + .quad 0 + .quad 0 + + .section .debug_loclists,"",@progbits + .long .Ldebug_loclist_table_end0-.Ldebug_loclist_table_start0 # Length +.Ldebug_loclist_table_start0: + .short 5 # Version + .byte 8 # Address size + .byte 0 # Segment selector size + .long 0 # Offset entry count + +.Lloclists_list: + .byte 4 # DW_LLE_offset_pair + .uleb128 loclists-loclists + .uleb128 .Lloclists_end-loclists + .uleb128 1 + .byte 80 # super-register DW_OP_reg0 + .byte 0 # DW_LLE_end_of_list +.Ldebug_loclist_table_end0: + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 8 # DW_FORM_string + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 3 # DW_AT_name + .byte 8 # DW_FORM_string + .byte 73 # DW_AT_type + .byte 16 # DW_FORM_ref_addr + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 23 # DW_FORM_sec_offset + .byte 3 # DW_AT_name + .byte 8 # DW_FORM_string + .byte 73 # DW_AT_type + .byte 16 # DW_FORM_ref_addr + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 8 # DW_FORM_string + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + + .section .debug_info,"",@progbits + .long .Lloc_cu_end-.Lloc_cu_start # Length of Unit +.Lloc_cu_start: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x50 DW_TAG_compile_unit + .asciz "Hand-written DWARF" # DW_AT_producer + .short 12 # DW_AT_language + .quad loc # DW_AT_low_pc + .long .Lloc_end-loc # DW_AT_high_pc + .byte 2 # Abbrev [2] 0x2a:0x29 DW_TAG_subprogram + .quad loc # DW_AT_low_pc + .long .Lloc_end-loc # DW_AT_high_pc + .asciz "loc" # DW_AT_name + .long .Lint # DW_AT_type + .byte 3 # Abbrev [3] DW_TAG_formal_parameter + .long .Lloc_list # DW_AT_location + .asciz "x0" # DW_AT_name + .long .Lint # DW_AT_type + .byte 0 # End Of Children Mark +.Lint: + .byte 4 # Abbrev [4] 0x53:0x7 DW_TAG_base_type + .asciz "int" # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Lloc_cu_end: + + .long .Lloclists_cu_end-.Lloclists_cu_start # Length of Unit +.Lloclists_cu_start: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xb:0x50 DW_TAG_compile_unit + .asciz "Hand-written DWARF" # DW_AT_producer + .short 12 # DW_AT_language + .quad loclists # DW_AT_low_pc + .long .Lloclists_end-loclists # DW_AT_high_pc + .byte 2 # Abbrev [2] 0x2a:0x29 DW_TAG_subprogram + .quad loclists # DW_AT_low_pc + .long .Lloclists_end-loclists # DW_AT_high_pc + .asciz "loclists" # DW_AT_name + .long .Lint # DW_AT_type + .byte 3 # Abbrev [3] DW_TAG_formal_parameter + .long .Lloclists_list # DW_AT_location + .asciz "x1" # DW_AT_name + .long .Lint # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark +.Lloclists_cu_end: diff --git a/lldb/tools/debugserver/source/MacOSX/MachProcess.mm b/lldb/tools/debugserver/source/MacOSX/MachProcess.mm index 2e952d6ad0bc7..40facdfb5cf9b 100644 --- a/lldb/tools/debugserver/source/MacOSX/MachProcess.mm +++ b/lldb/tools/debugserver/source/MacOSX/MachProcess.mm @@ -86,7 +86,7 @@ static CFStringRef CopyBundleIDForPath(const char *app_bundle_path, #if defined(WITH_BKS) || defined(WITH_FBS) #import static const int OPEN_APPLICATION_TIMEOUT_ERROR = 111; -typedef void (*SetErrorFunction)(NSInteger, DNBError &); +typedef void (*SetErrorFunction)(NSInteger, std::string, DNBError &); typedef bool (*CallOpenApplicationFunction)(NSString *bundleIDNSStr, NSDictionary *options, DNBError &error, pid_t *return_pid); @@ -122,6 +122,7 @@ static bool CallBoardSystemServiceOpenApplication(NSString *bundleIDNSStr, mach_port_t client_port = [system_service createClientPort]; __block dispatch_semaphore_t semaphore = dispatch_semaphore_create(0); __block ErrorFlavor open_app_error = no_error_enum_value; + __block std::string open_app_error_string; bool wants_pid = (return_pid != NULL); __block pid_t pid_in_block; @@ -159,6 +160,9 @@ static bool CallBoardSystemServiceOpenApplication(NSString *bundleIDNSStr, } else { const char *error_str = [(NSString *)[bks_error localizedDescription] UTF8String]; + if (error_str) { + open_app_error_string = error_str; + } DNBLogThreadedIf(LOG_PROCESS, "In completion handler for send " "event, got error \"%s\"(%ld).", error_str ? error_str : "", @@ -190,7 +194,7 @@ static bool CallBoardSystemServiceOpenApplication(NSString *bundleIDNSStr, error.SetError(OPEN_APPLICATION_TIMEOUT_ERROR, DNBError::Generic); error.SetErrorString("timed out trying to launch app"); } else if (open_app_error != no_error_enum_value) { - error_function(open_app_error, error); + error_function(open_app_error, open_app_error_string, error); DNBLogError("unable to launch the application with CFBundleIdentifier '%s' " "bks_error = %u", cstr, open_app_error); @@ -245,19 +249,19 @@ static bool IsBKSProcess(nub_process_t pid) { return app_state != BKSApplicationStateUnknown; } -static void SetBKSError(NSInteger error_code, DNBError &error) { +static void SetBKSError(NSInteger error_code, + std::string error_description, + DNBError &error) { error.SetError(error_code, DNBError::BackBoard); NSString *err_nsstr = ::BKSOpenApplicationErrorCodeToString( (BKSOpenApplicationErrorCode)error_code); - const char *err_str = NULL; - if (err_nsstr == NULL) - err_str = "unknown BKS error"; - else { + std::string err_str = "unknown BKS error"; + if (error_description.empty() == false) { + err_str = error_description; + } else if (err_nsstr != nullptr) { err_str = [err_nsstr UTF8String]; - if (err_str == NULL) - err_str = "unknown BKS error"; } - error.SetErrorString(err_str); + error.SetErrorString(err_str.c_str()); } static bool BKSAddEventDataToOptions(NSMutableDictionary *options, @@ -355,19 +359,19 @@ static bool IsFBSProcess(nub_process_t pid) { } #endif -static void SetFBSError(NSInteger error_code, DNBError &error) { +static void SetFBSError(NSInteger error_code, + std::string error_description, + DNBError &error) { error.SetError((DNBError::ValueType)error_code, DNBError::FrontBoard); NSString *err_nsstr = ::FBSOpenApplicationErrorCodeToString( (FBSOpenApplicationErrorCode)error_code); - const char *err_str = NULL; - if (err_nsstr == NULL) - err_str = "unknown FBS error"; - else { + std::string err_str = "unknown FBS error"; + if (error_description.empty() == false) { + err_str = error_description; + } else if (err_nsstr != nullptr) { err_str = [err_nsstr UTF8String]; - if (err_str == NULL) - err_str = "unknown FBS error"; } - error.SetErrorString(err_str); + error.SetErrorString(err_str.c_str()); } static bool FBSAddEventDataToOptions(NSMutableDictionary *options, @@ -2754,7 +2758,8 @@ static bool FBSAddEventDataToOptions(NSMutableDictionary *options, "debugserver timed out waiting for openApplication to complete."); attach_err.SetError(OPEN_APPLICATION_TIMEOUT_ERROR, DNBError::Generic); } else if (attach_error_code != FBSOpenApplicationErrorCodeNone) { - SetFBSError(attach_error_code, attach_err); + std::string empty_str; + SetFBSError(attach_error_code, empty_str, attach_err); DNBLogError("unable to launch the application with CFBundleIdentifier " "'%s' bks_error = %ld", bundleIDStr.c_str(), (NSInteger)attach_error_code); @@ -2831,7 +2836,8 @@ static bool FBSAddEventDataToOptions(NSMutableDictionary *options, "debugserver timed out waiting for openApplication to complete."); attach_err.SetError(OPEN_APPLICATION_TIMEOUT_ERROR, DNBError::Generic); } else if (attach_error_code != BKSOpenApplicationErrorCodeNone) { - SetBKSError(attach_error_code, attach_err); + std::string empty_str; + SetBKSError(attach_error_code, empty_str, attach_err); DNBLogError("unable to launch the application with CFBundleIdentifier " "'%s' bks_error = %ld", bundleIDStr.c_str(), attach_error_code); diff --git a/llvm/CODE_OWNERS.TXT b/llvm/CODE_OWNERS.TXT index df8aa0b4ef9d2..457dabe39f90c 100644 --- a/llvm/CODE_OWNERS.TXT +++ b/llvm/CODE_OWNERS.TXT @@ -150,6 +150,10 @@ N: Dylan McKay E: me@dylanmckay.io D: AVR Backend +N: Simon Moll +E: simon.moll@emea.nec.com +D: VE Backend + N: Tim Northover E: t.p.northover@gmail.com D: AArch64 backend, misc ARM backend diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h index e87a08f7effff..9bfaaccd953e2 100644 --- a/llvm/include/llvm/ADT/StringRef.h +++ b/llvm/include/llvm/ADT/StringRef.h @@ -566,7 +566,8 @@ namespace llvm { /// /// If \p AllowInexact is false, the function will fail if the string /// cannot be represented exactly. Otherwise, the function only fails - /// in case of an overflow or underflow. + /// in case of an overflow or underflow, or an invalid floating point + /// representation. bool getAsDouble(double &Result, bool AllowInexact = true) const; /// @} diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h index e9e57b6e92208..62b905f65bd74 100644 --- a/llvm/include/llvm/ADT/Triple.h +++ b/llvm/include/llvm/ADT/Triple.h @@ -98,7 +98,8 @@ class Triple { fpga_aoco, // Intel FPGA: unlinked object file fpga_aocr, // Intel FPGA: linked early image fpga_aocx, // Intel FPGA: linked image - LastArchType = renderscript64 + ve, // NEC SX-Aurora Vector Engine + LastArchType = ve }; enum SubArchType { NoSubArch, @@ -135,7 +136,9 @@ class Triple { SPIRSubArch_fpga, SPIRSubArch_gen, - SPIRSubArch_x86_64 + SPIRSubArch_x86_64, + + PPCSubArch_spe }; enum VendorType { UnknownVendor, @@ -746,6 +749,11 @@ class Triple { return getArch() == Triple::x86 || getArch() == Triple::x86_64; } + /// Tests whether the target is VE + bool isVE() const { + return getArch() == Triple::ve; + } + /// Tests whether the target supports comdat bool supportsCOMDAT() const { return !isOSBinFormatMachO(); diff --git a/llvm/include/llvm/CodeGen/LivePhysRegs.h b/llvm/include/llvm/CodeGen/LivePhysRegs.h index 50da0b3d5c483..085893462a083 100644 --- a/llvm/include/llvm/CodeGen/LivePhysRegs.h +++ b/llvm/include/llvm/CodeGen/LivePhysRegs.h @@ -137,6 +137,9 @@ class LivePhysRegs { /// Live out registers are the union of the live-in registers of the successor /// blocks and pristine registers. Live out registers of the end block are the /// callee saved registers. + /// If a register is not added by this method, it is guaranteed to not be + /// live out from MBB, although a sub-register may be. This is true + /// both before and after regalloc. void addLiveOuts(const MachineBasicBlock &MBB); /// Adds all live-out registers of basic block \p MBB but skips pristine diff --git a/llvm/include/llvm/CodeGen/MIRFormatter.h b/llvm/include/llvm/CodeGen/MIRFormatter.h new file mode 100644 index 0000000000000..e57c32c5ae614 --- /dev/null +++ b/llvm/include/llvm/CodeGen/MIRFormatter.h @@ -0,0 +1,83 @@ +//===-- llvm/CodeGen/MIRFormatter.h -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the MIRFormatter class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MIRFORMATTER_H +#define LLVM_CODEGEN_MIRFORMATTER_H + +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/PseudoSourceValue.h" + +namespace llvm { + +struct PerFunctionMIParsingState; +struct SlotMapping; + +/// MIRFormater - Interface to format MIR operand based on target +class MIRFormatter { +public: + typedef function_ref + ErrorCallbackType; + + MIRFormatter() {} + virtual ~MIRFormatter() = default; + + /// Implement target specific printing for machine operand immediate value, so + /// that we can have more meaningful mnemonic than a 64-bit integer. Passing + /// None to OpIdx means the index is unknown. + virtual void printImm(raw_ostream &OS, const MachineInstr &MI, + Optional OpIdx, int64_t Imm) const { + OS << Imm; + } + + /// Implement target specific parsing of immediate mnemonics. The mnemonic is + /// dot seperated strings. + virtual bool parseImmMnemonic(const unsigned OpCode, const unsigned OpIdx, + StringRef Src, int64_t &Imm, + ErrorCallbackType ErrorCallback) const { + llvm_unreachable("target did not implement parsing MIR immediate mnemonic"); + } + + /// Implement target specific printing of target custom pseudo source value. + /// Default implementation is not necessarily the correct MIR serialization + /// format. + virtual void + printCustomPseudoSourceValue(raw_ostream &OS, ModuleSlotTracker &MST, + const PseudoSourceValue &PSV) const { + PSV.printCustom(OS); + } + + /// Implement target specific parsing of target custom pseudo source value. + virtual bool parseCustomPseudoSourceValue( + StringRef Src, MachineFunction &MF, PerFunctionMIParsingState &PFS, + const PseudoSourceValue *&PSV, ErrorCallbackType ErrorCallback) const { + llvm_unreachable( + "target did not implement parsing MIR custom pseudo source value"); + } + + /// Helper functions to print IR value as MIR serialization format which will + /// be useful for target specific printer, e.g. for printing IR value in + /// custom pseudo source value. + static void printIRValue(raw_ostream &OS, const Value &V, + ModuleSlotTracker &MST); + + /// Helper functions to parse IR value from MIR serialization format which + /// will be useful for target specific parser, e.g. for parsing IR value for + /// custom pseudo source value. + static bool parseIRValue(StringRef Src, MachineFunction &MF, + PerFunctionMIParsingState &PFS, const Value *&V, + ErrorCallbackType ErrorCallback); +}; + +} // end namespace llvm + +#endif diff --git a/llvm/include/llvm/CodeGen/MIRParser/MIParser.h b/llvm/include/llvm/CodeGen/MIRParser/MIParser.h index 4e32a04551c1c..8ca665b23b280 100644 --- a/llvm/include/llvm/CodeGen/MIRParser/MIParser.h +++ b/llvm/include/llvm/CodeGen/MIRParser/MIParser.h @@ -171,12 +171,16 @@ struct PerFunctionMIParsingState { DenseMap ConstantPoolSlots; DenseMap JumpTableSlots; + /// Maps from slot numbers to function's unnamed values. + DenseMap Slots2Values; + PerFunctionMIParsingState(MachineFunction &MF, SourceMgr &SM, const SlotMapping &IRSlots, PerTargetMIParsingState &Target); VRegInfo &getVRegInfo(unsigned Num); VRegInfo &getVRegInfoNamed(StringRef RegName); + const Value *getIRValue(unsigned Slot); }; /// Parse the machine basic block definitions, and skip the machine diff --git a/llvm/include/llvm/CodeGen/MachineMemOperand.h b/llvm/include/llvm/CodeGen/MachineMemOperand.h index 7ee700c62a25c..b0243646b06c7 100644 --- a/llvm/include/llvm/CodeGen/MachineMemOperand.h +++ b/llvm/include/llvm/CodeGen/MachineMemOperand.h @@ -26,6 +26,7 @@ namespace llvm { class FoldingSetNodeID; class MDNode; +class MIRFormatter; class raw_ostream; class MachineFunction; class ModuleSlotTracker; @@ -295,7 +296,8 @@ class MachineMemOperand { /// @{ void print(raw_ostream &OS, ModuleSlotTracker &MST, SmallVectorImpl &SSNs, const LLVMContext &Context, - const MachineFrameInfo *MFI, const TargetInstrInfo *TII) const; + const MachineFrameInfo *MFI, const TargetInstrInfo *TII, + const MIRFormatter *MIRF) const; /// @} friend bool operator==(const MachineMemOperand &LHS, diff --git a/llvm/include/llvm/CodeGen/MachineOperand.h b/llvm/include/llvm/CodeGen/MachineOperand.h index df914dc2d85e8..4222c03b023a9 100644 --- a/llvm/include/llvm/CodeGen/MachineOperand.h +++ b/llvm/include/llvm/CodeGen/MachineOperand.h @@ -278,6 +278,9 @@ class MachineOperand { /// More complex way of printing a MachineOperand. /// \param TypeToPrint specifies the generic type to be printed on uses and /// defs. It can be determined using MachineInstr::getTypeToPrint. + /// \param OpIdx - specifies the index of the operand in machine instruction. + /// This will be used by target dependent MIR formatter. Could be None if the + /// index is unknown, e.g. called by dump(). /// \param PrintDef - whether we want to print `def` on an operand which /// isDef. Sometimes, if the operand is printed before '=', we don't print /// `def`. @@ -294,8 +297,9 @@ class MachineOperand { /// information from it's parent. /// \param IntrinsicInfo - same as \p TRI. void print(raw_ostream &os, ModuleSlotTracker &MST, LLT TypeToPrint, - bool PrintDef, bool IsStandalone, bool ShouldPrintRegisterTies, - unsigned TiedOperandIdx, const TargetRegisterInfo *TRI, + Optional OpIdx, bool PrintDef, bool IsStandalone, + bool ShouldPrintRegisterTies, unsigned TiedOperandIdx, + const TargetRegisterInfo *TRI, const TargetIntrinsicInfo *IntrinsicInfo) const; /// Same as print(os, TRI, IntrinsicInfo), but allows to specify the low-level diff --git a/llvm/include/llvm/CodeGen/PseudoSourceValue.h b/llvm/include/llvm/CodeGen/PseudoSourceValue.h index 4b3cc9145a134..593a865ea5458 100644 --- a/llvm/include/llvm/CodeGen/PseudoSourceValue.h +++ b/llvm/include/llvm/CodeGen/PseudoSourceValue.h @@ -22,6 +22,7 @@ namespace llvm { class MachineFrameInfo; class MachineMemOperand; +class MIRFormatter; class raw_ostream; class TargetInstrInfo; @@ -52,6 +53,7 @@ class PseudoSourceValue { const PseudoSourceValue* PSV); friend class MachineMemOperand; // For printCustom(). + friend class MIRFormatter; // For printCustom(). /// Implement printing for PseudoSourceValue. This is called from /// Value::print or Value's operator<<. diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h index a0beee36c7484..c7d4c4d7e5d44 100644 --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -309,13 +309,6 @@ class TargetFrameLowering { RegScavenger *RS = nullptr) const { } - /// processFunctionBeforeFrameIndicesReplaced - This method is called - /// immediately before MO_FrameIndex operands are eliminated, but after the - /// frame is finalized. This method is optional. - virtual void - processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, - RegScavenger *RS = nullptr) const {} - virtual unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const { report_fatal_error("WinEH not implemented for this target"); } diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 4b4cea30b2ba3..e410d1c4806d3 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/None.h" #include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/MIRFormatter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineCombinerPattern.h" #include "llvm/CodeGen/MachineFunction.h" @@ -1807,6 +1808,7 @@ class TargetInstrInfo : public MCInstrInfo { Register Reg) const; private: + mutable std::unique_ptr Formatter; unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode; unsigned CatchRetOpcode; unsigned ReturnOpcode; diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index 35d40c67b44f2..518ad7079225e 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -773,6 +773,9 @@ class Neon_Dot_Intrinsic def int_arm_neon_udot : Neon_Dot_Intrinsic; def int_arm_neon_sdot : Neon_Dot_Intrinsic; +def int_arm_cls: Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_arm_cls64: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>; + def int_arm_mve_vctp8 : Intrinsic<[llvm_v16i1_ty], [llvm_i32_ty], [IntrNoMem]>; def int_arm_mve_vctp16 : Intrinsic<[llvm_v8i1_ty], [llvm_i32_ty], [IntrNoMem]>; def int_arm_mve_vctp32 : Intrinsic<[llvm_v4i1_ty], [llvm_i32_ty], [IntrNoMem]>; @@ -881,7 +884,7 @@ defm int_arm_mve_maxv: IntrinsicSignSuffix<[llvm_i32_ty], multiclass MVEPredicated rets, list params, LLVMType pred = llvm_anyvector_ty, - list props = []> { + list props = [IntrNoMem]> { def "": Intrinsic; def _predicated: Intrinsic; } @@ -895,7 +898,7 @@ multiclass MVEPredicatedM rets, list params, } defm int_arm_mve_vcvt_narrow: MVEPredicated<[llvm_v8f16_ty], - [llvm_v8f16_ty, llvm_v4f32_ty, llvm_i32_ty], llvm_v4i1_ty, [IntrNoMem]>; + [llvm_v8f16_ty, llvm_v4f32_ty, llvm_i32_ty], llvm_v4i1_ty>; defm int_arm_mve_vldr_gather_base: MVEPredicated< [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty], @@ -1033,7 +1036,7 @@ def int_arm_mve_vmull_poly: Intrinsic< multiclass MVEMXPredicated rets, list flags, list params, LLVMType inactive, LLVMType predicate, - list props = []> { + list props = [IntrNoMem]> { def "": Intrinsic; def _predicated: Intrinsic; @@ -1047,7 +1050,7 @@ multiclass MVEMXPredicated rets, list flags, defm int_arm_mve_vcaddq : MVEMXPredicated< [llvm_anyvector_ty], [llvm_i32_ty, llvm_i32_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - LLVMMatchType<0>, llvm_anyvector_ty, [IntrNoMem]>; + LLVMMatchType<0>, llvm_anyvector_ty>; // The first operand of the following two intrinsics is the rotation angle // (must be a compile-time constant): @@ -1058,12 +1061,12 @@ defm int_arm_mve_vcaddq : MVEMXPredicated< defm int_arm_mve_vcmulq : MVEMXPredicated< [llvm_anyvector_ty], [llvm_i32_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - LLVMMatchType<0>, llvm_anyvector_ty, [IntrNoMem]>; + LLVMMatchType<0>, llvm_anyvector_ty>; defm int_arm_mve_vcmlaq : MVEPredicated< [llvm_anyvector_ty], [llvm_i32_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - llvm_anyvector_ty, [IntrNoMem]>; + llvm_anyvector_ty>; def int_arm_mve_vld2q: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_anyptr_ty], [IntrReadMem]>; def int_arm_mve_vld4q: Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_anyptr_ty], [IntrReadMem]>; @@ -1072,9 +1075,6 @@ def int_arm_mve_vst2q: Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMat def int_arm_mve_vst4q: Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, LLVMMatchType<1>, llvm_i32_ty], [IntrWriteMem] >; -def int_arm_cls: Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; -def int_arm_cls64: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>; - // MVE vector absolute difference and accumulate across vector // The first operand is an 'unsigned' flag. The remaining operands are: // * accumulator @@ -1083,8 +1083,7 @@ def int_arm_cls64: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>; // * mask (only in predicated versions) defm int_arm_mve_vabav: MVEPredicated< [llvm_i32_ty], - [llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], llvm_anyvector_ty, - [IntrNoMem]>; + [llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], llvm_anyvector_ty>; // The following 3 instrinsics are MVE vector reductions with two vector // operands. @@ -1107,19 +1106,19 @@ defm int_arm_mve_vmldava: MVEPredicated< [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], - llvm_anyvector_ty, [IntrNoMem]>; + llvm_anyvector_ty>; // Version with 64-bit result, vml{a,s}ldav[a][x] defm int_arm_mve_vmlldava: MVEPredicated< [llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], - llvm_anyvector_ty, [IntrNoMem]>; + llvm_anyvector_ty>; // Version with 72-bit rounded result, vrml{a,s}ldavh[a][x] defm int_arm_mve_vrmlldavha: MVEPredicated< [llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], - llvm_anyvector_ty, [IntrNoMem]>; + llvm_anyvector_ty>; } // end TargetPrefix diff --git a/llvm/include/llvm/IR/ValueHandle.h b/llvm/include/llvm/IR/ValueHandle.h index 11ac2a8608d81..50b7701f67162 100644 --- a/llvm/include/llvm/IR/ValueHandle.h +++ b/llvm/include/llvm/IR/ValueHandle.h @@ -171,6 +171,25 @@ template <> struct simplify_type { static SimpleType getSimplifiedValue(const WeakVH &WVH) { return WVH; } }; +// Specialize DenseMapInfo to allow WeakVH to participate in DenseMap. +template <> struct DenseMapInfo { + static inline WeakVH getEmptyKey() { + return WeakVH(DenseMapInfo::getEmptyKey()); + } + + static inline WeakVH getTombstoneKey() { + return WeakVH(DenseMapInfo::getTombstoneKey()); + } + + static unsigned getHashValue(const WeakVH &Val) { + return DenseMapInfo::getHashValue(Val); + } + + static bool isEqual(const WeakVH &LHS, const WeakVH &RHS) { + return DenseMapInfo::isEqual(LHS, RHS); + } +}; + /// Value handle that is nullable, but tries to track the Value. /// /// This is a value handle that tries hard to point to a Value, even across diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h index 004a6f5f6eb80..37b9669cbeed9 100644 --- a/llvm/include/llvm/Support/MathExtras.h +++ b/llvm/include/llvm/Support/MathExtras.h @@ -732,6 +732,11 @@ inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) { return alignTo(Numerator, Denominator) / Denominator; } +/// Returns the integer nearest(Numerator / Denominator). +inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) { + return (Numerator + (Denominator / 2)) / Denominator; +} + /// Returns the largest uint64_t less than or equal to \p Value and is /// \p Skew mod \p Align. \p Align must be non-zero inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { diff --git a/llvm/include/llvm/Support/Memory.h b/llvm/include/llvm/Support/Memory.h index 6f22dd7080cd5..c0454223b2fd1 100644 --- a/llvm/include/llvm/Support/Memory.h +++ b/llvm/include/llvm/Support/Memory.h @@ -57,6 +57,17 @@ namespace sys { MF_WRITE = 0x2000000, MF_EXEC = 0x4000000, MF_RWE_MASK = 0x7000000, + + /// The \p MF_HUGE_HINT flag is used to indicate that the request for + /// a memory block should be satisfied with large pages if possible. + /// This is only a hint and small pages will be used as fallback. + /// + /// The presence or absence of this flag in the returned memory block + /// is (at least currently) *not* a reliable indicator that the memory + /// block will use or will not use large pages. On some systems a request + /// without this flag can be backed by large pages without this flag being + /// set, and on some other systems a request with this flag can fallback + /// to small pages without this flag being cleared. MF_HUGE_HINT = 0x0000001 }; diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h index 176ae39b17a7c..39422ac3bf8ca 100644 --- a/llvm/include/llvm/Target/TargetMachine.h +++ b/llvm/include/llvm/Target/TargetMachine.h @@ -33,6 +33,7 @@ class MCInstrInfo; class MCRegisterInfo; class MCSubtargetInfo; class MCSymbol; +class MIRFormatter; class raw_pwrite_stream; class PassManagerBuilder; struct PerFunctionMIParsingState; @@ -94,6 +95,7 @@ class TargetMachine { std::unique_ptr MRI; std::unique_ptr MII; std::unique_ptr STI; + std::unique_ptr MIRF; unsigned RequireStructuredCFG : 1; unsigned O0WantsFastISel : 1; @@ -197,6 +199,10 @@ class TargetMachine { return nullptr; } + /// Return MIR formatter to format/parse MIR operands. Target can override + /// this virtual function and return target specific MIR formatter. + virtual const MIRFormatter *getMIRFormatter() const { return MIRF.get(); } + bool requiresStructuredCFG() const { return RequireStructuredCFG; } void setRequiresStructuredCFG(bool Value) { RequireStructuredCFG = Value; } diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 53236b54ff0f0..70642b201adb3 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -865,6 +865,13 @@ struct Attributor { ToBeChangedToUnreachableInsts.insert(I); } + /// Record that \p II has at least one dead successor block. This information + /// is used, e.g., to replace \p II with a call, after information was + /// manifested. + void registerInvokeWithDeadSuccessor(InvokeInst &II) { + InvokeWithDeadSuccessor.push_back(&II); + } + /// Record that \p I is deleted after information was manifested. This also /// triggers deletion of trivially dead istructions. void deleteAfterManifest(Instruction &I) { ToBeDeletedInsts.insert(&I); } @@ -1174,7 +1181,10 @@ struct Attributor { DenseMap ToBeChangedUses; /// Instructions we replace with `unreachable` insts after manifest is done. - SmallPtrSet ToBeChangedToUnreachableInsts; + SmallDenseSet ToBeChangedToUnreachableInsts; + + /// Invoke instructions with at least a single dead successor block. + SmallVector InvokeWithDeadSuccessor; /// Functions, blocks, and instructions we delete after manifest is done. /// diff --git a/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h b/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h index 7920269b0fb27..233963528595e 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h @@ -15,9 +15,7 @@ namespace llvm { -class Loop; -struct LoopStandardAnalysisResults; -class LPMUpdater; +class Function; /// A simple loop rotation transformation. class LoopUnrollAndJamPass : public PassInfoMixin { @@ -25,8 +23,7 @@ class LoopUnrollAndJamPass : public PassInfoMixin { public: explicit LoopUnrollAndJamPass(int OptLevel = 2) : OptLevel(OptLevel) {} - PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, - LoopStandardAnalysisResults &AR, LPMUpdater &U); + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; } // end namespace llvm diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index afcca2ab1fa3a..d7510c8991013 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -3996,6 +3996,15 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, return FalseVal; } + // select i1 Cond, i1 true, i1 false --> i1 Cond + assert(Cond->getType()->isIntOrIntVectorTy(1) && + "Select must have bool or bool vector condition"); + assert(TrueVal->getType() == FalseVal->getType() && + "Select must have same types for true/false ops"); + if (Cond->getType() == TrueVal->getType() && + match(TrueVal, m_One()) && match(FalseVal, m_ZeroInt())) + return Cond; + // select ?, X, X -> X if (TrueVal == FalseVal) return TrueVal; diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp index 21511586ff185..0c35a91f8282e 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -242,6 +242,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("jump-table", MIToken::kw_jump_table) .Case("constant-pool", MIToken::kw_constant_pool) .Case("call-entry", MIToken::kw_call_entry) + .Case("custom", MIToken::kw_custom) .Case("liveout", MIToken::kw_liveout) .Case("address-taken", MIToken::kw_address_taken) .Case("landing-pad", MIToken::kw_landing_pad) diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h index 1e2eba91ceb53..af5327cacfea5 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -110,6 +110,7 @@ struct MIToken { kw_jump_table, kw_constant_pool, kw_call_entry, + kw_custom, kw_liveout, kw_address_taken, kw_landing_pad, diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 525c70016a0fb..0f2648e2bfac5 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -28,6 +28,7 @@ #include "llvm/AsmParser/SlotMapping.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/MIRFormatter.h" #include "llvm/CodeGen/MIRPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -343,6 +344,37 @@ VRegInfo &PerFunctionMIParsingState::getVRegInfoNamed(StringRef RegName) { return *I.first->second; } +static void mapValueToSlot(const Value *V, ModuleSlotTracker &MST, + DenseMap &Slots2Values) { + int Slot = MST.getLocalSlot(V); + if (Slot == -1) + return; + Slots2Values.insert(std::make_pair(unsigned(Slot), V)); +} + +/// Creates the mapping from slot numbers to function's unnamed IR values. +static void initSlots2Values(const Function &F, + DenseMap &Slots2Values) { + ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false); + MST.incorporateFunction(F); + for (const auto &Arg : F.args()) + mapValueToSlot(&Arg, MST, Slots2Values); + for (const auto &BB : F) { + mapValueToSlot(&BB, MST, Slots2Values); + for (const auto &I : BB) + mapValueToSlot(&I, MST, Slots2Values); + } +} + +const Value* PerFunctionMIParsingState::getIRValue(unsigned Slot) { + if (Slots2Values.empty()) + initSlots2Values(MF.getFunction(), Slots2Values); + auto ValueInfo = Slots2Values.find(Slot); + if (ValueInfo == Slots2Values.end()) + return nullptr; + return ValueInfo->second; +} + namespace { /// A wrapper struct around the 'MachineOperand' struct that includes a source @@ -370,8 +402,6 @@ class MIParser { PerFunctionMIParsingState &PFS; /// Maps from slot numbers to function's unnamed basic blocks. DenseMap Slots2BasicBlocks; - /// Maps from slot numbers to function's unnamed values. - DenseMap Slots2Values; public: MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error, @@ -455,9 +485,12 @@ class MIParser { bool parseTargetIndexOperand(MachineOperand &Dest); bool parseCustomRegisterMaskOperand(MachineOperand &Dest); bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest); - bool parseMachineOperand(MachineOperand &Dest, + bool parseMachineOperand(const unsigned OpCode, const unsigned OpIdx, + MachineOperand &Dest, Optional &TiedDefIdx); - bool parseMachineOperandAndTargetFlags(MachineOperand &Dest, + bool parseMachineOperandAndTargetFlags(const unsigned OpCode, + const unsigned OpIdx, + MachineOperand &Dest, Optional &TiedDefIdx); bool parseOffset(int64_t &Offset); bool parseAlignment(unsigned &Alignment); @@ -473,6 +506,9 @@ class MIParser { bool parsePreOrPostInstrSymbol(MCSymbol *&Symbol); bool parseHeapAllocMarker(MDNode *&Node); + bool parseTargetImmMnemonic(const unsigned OpCode, const unsigned OpIdx, + MachineOperand &Dest, const MIRFormatter &MF); + private: /// Convert the integer literal in the current token into an unsigned integer. /// @@ -551,6 +587,9 @@ bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) { return true; } +typedef function_ref + ErrorCallbackType; + static const char *toString(MIToken::TokenKind TokenKind) { switch (TokenKind) { case MIToken::comma: @@ -912,7 +951,7 @@ bool MIParser::parse(MachineInstr *&MI) { Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) { auto Loc = Token.location(); Optional TiedDefIdx; - if (parseMachineOperandAndTargetFlags(MO, TiedDefIdx)) + if (parseMachineOperandAndTargetFlags(OpCode, Operands.size(), MO, TiedDefIdx)) return true; if (OpCode == TargetOpcode::DBG_VALUE && MO.isReg()) MO.setIsDebug(); @@ -1493,17 +1532,61 @@ bool MIParser::parseImmediateOperand(MachineOperand &Dest) { return false; } -bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue, - const Constant *&C) { +bool MIParser::parseTargetImmMnemonic(const unsigned OpCode, + const unsigned OpIdx, + MachineOperand &Dest, + const MIRFormatter &MF) { + assert(Token.is(MIToken::dot)); + auto Loc = Token.location(); // record start position + size_t Len = 1; // for "." + lex(); + + // Handle the case that mnemonic starts with number. + if (Token.is(MIToken::IntegerLiteral)) { + Len += Token.range().size(); + lex(); + } + + StringRef Src; + if (Token.is(MIToken::comma)) + Src = StringRef(Loc, Len); + else { + assert(Token.is(MIToken::Identifier)); + Src = StringRef(Loc, Len + Token.stringValue().size()); + } + int64_t Val; + if (MF.parseImmMnemonic(OpCode, OpIdx, Src, Val, + [this](StringRef::iterator Loc, const Twine &Msg) + -> bool { return error(Loc, Msg); })) + return true; + + Dest = MachineOperand::CreateImm(Val); + if (!Token.is(MIToken::comma)) + lex(); + return false; +} + +static bool parseIRConstant(StringRef::iterator Loc, StringRef StringValue, + PerFunctionMIParsingState &PFS, const Constant *&C, + ErrorCallbackType ErrCB) { auto Source = StringValue.str(); // The source has to be null terminated. SMDiagnostic Err; - C = parseConstantValue(Source, Err, *MF.getFunction().getParent(), + C = parseConstantValue(Source, Err, *PFS.MF.getFunction().getParent(), &PFS.IRSlots); if (!C) - return error(Loc + Err.getColumnNo(), Err.getMessage()); + return ErrCB(Loc + Err.getColumnNo(), Err.getMessage()); return false; } +bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue, + const Constant *&C) { + return ::parseIRConstant( + Loc, StringValue, PFS, C, + [this](StringRef::iterator Loc, const Twine &Msg) -> bool { + return error(Loc, Msg); + }); +} + bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) { if (parseIRConstant(Loc, StringRef(Loc, Token.range().end() - Loc), C)) return true; @@ -1636,27 +1719,52 @@ bool MIParser::parseFPImmediateOperand(MachineOperand &Dest) { return false; } -bool MIParser::getUnsigned(unsigned &Result) { +static bool getHexUint(const MIToken &Token, APInt &Result) { + assert(Token.is(MIToken::HexLiteral)); + StringRef S = Token.range(); + assert(S[0] == '0' && tolower(S[1]) == 'x'); + // This could be a floating point literal with a special prefix. + if (!isxdigit(S[2])) + return true; + StringRef V = S.substr(2); + APInt A(V.size()*4, V, 16); + + // If A is 0, then A.getActiveBits() is 0. This isn't a valid bitwidth. Make + // sure it isn't the case before constructing result. + unsigned NumBits = (A == 0) ? 32 : A.getActiveBits(); + Result = APInt(NumBits, ArrayRef(A.getRawData(), A.getNumWords())); + return false; +} + +bool getUnsigned(const MIToken &Token, unsigned &Result, + ErrorCallbackType ErrCB) { if (Token.hasIntegerValue()) { const uint64_t Limit = uint64_t(std::numeric_limits::max()) + 1; uint64_t Val64 = Token.integerValue().getLimitedValue(Limit); if (Val64 == Limit) - return error("expected 32-bit integer (too large)"); + return ErrCB(Token.location(), "expected 32-bit integer (too large)"); Result = Val64; return false; } if (Token.is(MIToken::HexLiteral)) { APInt A; - if (getHexUint(A)) + if (getHexUint(Token, A)) return true; if (A.getBitWidth() > 32) - return error("expected 32-bit integer (too large)"); + return ErrCB(Token.location(), "expected 32-bit integer (too large)"); Result = A.getZExtValue(); return false; } return true; } +bool MIParser::getUnsigned(unsigned &Result) { + return ::getUnsigned( + Token, Result, [this](StringRef::iterator Loc, const Twine &Msg) -> bool { + return error(Loc, Msg); + }); +} + bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) { assert(Token.is(MIToken::MachineBasicBlock) || Token.is(MIToken::MachineBasicBlockLabel)); @@ -1736,23 +1844,25 @@ bool MIParser::parseFixedStackObjectOperand(MachineOperand &Dest) { return false; } -bool MIParser::parseGlobalValue(GlobalValue *&GV) { +static bool parseGlobalValue(const MIToken &Token, + PerFunctionMIParsingState &PFS, GlobalValue *&GV, + ErrorCallbackType ErrCB) { switch (Token.kind()) { case MIToken::NamedGlobalValue: { - const Module *M = MF.getFunction().getParent(); + const Module *M = PFS.MF.getFunction().getParent(); GV = M->getNamedValue(Token.stringValue()); if (!GV) - return error(Twine("use of undefined global value '") + Token.range() + - "'"); + return ErrCB(Token.location(), Twine("use of undefined global value '") + + Token.range() + "'"); break; } case MIToken::GlobalValue: { unsigned GVIdx; - if (getUnsigned(GVIdx)) + if (getUnsigned(Token, GVIdx, ErrCB)) return true; if (GVIdx >= PFS.IRSlots.GlobalValues.size()) - return error(Twine("use of undefined global value '@") + Twine(GVIdx) + - "'"); + return ErrCB(Token.location(), Twine("use of undefined global value '@") + + Twine(GVIdx) + "'"); GV = PFS.IRSlots.GlobalValues[GVIdx]; break; } @@ -1762,6 +1872,14 @@ bool MIParser::parseGlobalValue(GlobalValue *&GV) { return false; } +bool MIParser::parseGlobalValue(GlobalValue *&GV) { + return ::parseGlobalValue( + Token, PFS, GV, + [this](StringRef::iterator Loc, const Twine &Msg) -> bool { + return error(Loc, Msg); + }); +} + bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) { GlobalValue *GV = nullptr; if (parseGlobalValue(GV)) @@ -2410,7 +2528,8 @@ bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) { return false; } -bool MIParser::parseMachineOperand(MachineOperand &Dest, +bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx, + MachineOperand &Dest, Optional &TiedDefIdx) { switch (Token.kind()) { case MIToken::kw_implicit: @@ -2499,6 +2618,12 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest, return parseCustomRegisterMaskOperand(Dest); } else return parseTypedImmediateOperand(Dest); + case MIToken::dot: { + if (const auto *Formatter = MF.getTarget().getMIRFormatter()) { + return parseTargetImmMnemonic(OpCode, OpIdx, Dest, *Formatter); + } + LLVM_FALLTHROUGH; + } default: // FIXME: Parse the MCSymbol machine operand. return error("expected a machine operand"); @@ -2507,7 +2632,8 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest, } bool MIParser::parseMachineOperandAndTargetFlags( - MachineOperand &Dest, Optional &TiedDefIdx) { + const unsigned OpCode, const unsigned OpIdx, MachineOperand &Dest, + Optional &TiedDefIdx) { unsigned TF = 0; bool HasTargetFlags = false; if (Token.is(MIToken::kw_target_flags)) { @@ -2539,7 +2665,7 @@ bool MIParser::parseMachineOperandAndTargetFlags( return true; } auto Loc = Token.location(); - if (parseMachineOperand(Dest, TiedDefIdx)) + if (parseMachineOperand(OpCode, OpIdx, Dest, TiedDefIdx)) return true; if (!HasTargetFlags) return false; @@ -2600,30 +2726,31 @@ bool MIParser::parseOperandsOffset(MachineOperand &Op) { return false; } -bool MIParser::parseIRValue(const Value *&V) { +static bool parseIRValue(const MIToken &Token, PerFunctionMIParsingState &PFS, + const Value *&V, ErrorCallbackType ErrCB) { switch (Token.kind()) { case MIToken::NamedIRValue: { - V = MF.getFunction().getValueSymbolTable()->lookup(Token.stringValue()); + V = PFS.MF.getFunction().getValueSymbolTable()->lookup(Token.stringValue()); break; } case MIToken::IRValue: { unsigned SlotNumber = 0; - if (getUnsigned(SlotNumber)) + if (getUnsigned(Token, SlotNumber, ErrCB)) return true; - V = getIRValue(SlotNumber); + V = PFS.getIRValue(SlotNumber); break; } case MIToken::NamedGlobalValue: case MIToken::GlobalValue: { GlobalValue *GV = nullptr; - if (parseGlobalValue(GV)) + if (parseGlobalValue(Token, PFS, GV, ErrCB)) return true; V = GV; break; } case MIToken::QuotedIRValue: { const Constant *C = nullptr; - if (parseIRConstant(Token.location(), Token.stringValue(), C)) + if (parseIRConstant(Token.location(), Token.stringValue(), PFS, C, ErrCB)) return true; V = C; break; @@ -2632,10 +2759,17 @@ bool MIParser::parseIRValue(const Value *&V) { llvm_unreachable("The current token should be an IR block reference"); } if (!V) - return error(Twine("use of undefined IR value '") + Token.range() + "'"); + return ErrCB(Token.location(), Twine("use of undefined IR value '") + Token.range() + "'"); return false; } +bool MIParser::parseIRValue(const Value *&V) { + return ::parseIRValue( + Token, PFS, V, [this](StringRef::iterator Loc, const Twine &Msg) -> bool { + return error(Loc, Msg); + }); +} + bool MIParser::getUint64(uint64_t &Result) { if (Token.hasIntegerValue()) { if (Token.integerValue().getActiveBits() > 64) @@ -2656,20 +2790,7 @@ bool MIParser::getUint64(uint64_t &Result) { } bool MIParser::getHexUint(APInt &Result) { - assert(Token.is(MIToken::HexLiteral)); - StringRef S = Token.range(); - assert(S[0] == '0' && tolower(S[1]) == 'x'); - // This could be a floating point literal with a special prefix. - if (!isxdigit(S[2])) - return true; - StringRef V = S.substr(2); - APInt A(V.size()*4, V, 16); - - // If A is 0, then A.getActiveBits() is 0. This isn't a valid bitwidth. Make - // sure it isn't the case before constructing result. - unsigned NumBits = (A == 0) ? 32 : A.getActiveBits(); - Result = APInt(NumBits, ArrayRef(A.getRawData(), A.getNumWords())); - return false; + return ::getHexUint(Token, Result); } bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) { @@ -2756,6 +2877,19 @@ bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) { "expected a global value or an external symbol after 'call-entry'"); } break; + case MIToken::kw_custom: { + lex(); + if (const auto *Formatter = MF.getTarget().getMIRFormatter()) { + if (Formatter->parseCustomPseudoSourceValue( + Token.stringValue(), MF, PFS, PSV, + [this](StringRef::iterator Loc, const Twine &Msg) -> bool { + return error(Loc, Msg); + })) + return true; + } else + return error("unable to parse target custom pseudo source value"); + break; + } default: llvm_unreachable("The current token should be pseudo source value"); } @@ -2767,7 +2901,7 @@ bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) { if (Token.is(MIToken::kw_constant_pool) || Token.is(MIToken::kw_stack) || Token.is(MIToken::kw_got) || Token.is(MIToken::kw_jump_table) || Token.is(MIToken::FixedStackObject) || Token.is(MIToken::StackObject) || - Token.is(MIToken::kw_call_entry)) { + Token.is(MIToken::kw_call_entry) || Token.is(MIToken::kw_custom)) { const PseudoSourceValue *PSV = nullptr; if (parseMemoryPseudoSourceValue(PSV)) return true; @@ -3018,35 +3152,8 @@ const BasicBlock *MIParser::getIRBlock(unsigned Slot, const Function &F) { return getIRBlockFromSlot(Slot, CustomSlots2BasicBlocks); } -static void mapValueToSlot(const Value *V, ModuleSlotTracker &MST, - DenseMap &Slots2Values) { - int Slot = MST.getLocalSlot(V); - if (Slot == -1) - return; - Slots2Values.insert(std::make_pair(unsigned(Slot), V)); -} - -/// Creates the mapping from slot numbers to function's unnamed IR values. -static void initSlots2Values(const Function &F, - DenseMap &Slots2Values) { - ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false); - MST.incorporateFunction(F); - for (const auto &Arg : F.args()) - mapValueToSlot(&Arg, MST, Slots2Values); - for (const auto &BB : F) { - mapValueToSlot(&BB, MST, Slots2Values); - for (const auto &I : BB) - mapValueToSlot(&I, MST, Slots2Values); - } -} - const Value *MIParser::getIRValue(unsigned Slot) { - if (Slots2Values.empty()) - initSlots2Values(MF.getFunction(), Slots2Values); - auto ValueInfo = Slots2Values.find(Slot); - if (ValueInfo == Slots2Values.end()) - return nullptr; - return ValueInfo->second; + return PFS.getIRValue(Slot); } MCSymbol *MIParser::getOrCreateMCSymbol(StringRef Name) { @@ -3111,3 +3218,15 @@ bool llvm::parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src, SMDiagnostic &Error) { return MIParser(PFS, Error, Src).parseStandaloneMDNode(Node); } + +bool MIRFormatter::parseIRValue(StringRef Src, MachineFunction &MF, + PerFunctionMIParsingState &PFS, const Value *&V, + ErrorCallbackType ErrorCallback) { + MIToken Token; + Src = lexMIToken(Src, Token, [&](StringRef::iterator Loc, const Twine &Msg) { + ErrorCallback(Loc, Msg); + }); + V = nullptr; + + return ::parseIRValue(Token, PFS, V, ErrorCallback); +} diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index b06e34a809fca..9d9c12a95918c 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -709,6 +709,7 @@ void MIPrinter::print(const MachineInstr &MI) { const auto *TRI = SubTarget.getRegisterInfo(); assert(TRI && "Expected target register info"); const auto *TII = SubTarget.getInstrInfo(); + const auto *MIRF = MF->getTarget().getMIRFormatter(); assert(TII && "Expected target instruction info"); if (MI.isCFIInstruction()) assert(MI.getNumOperands() == 1 && "Expected 1 operand in CFI instruction"); @@ -807,7 +808,7 @@ void MIPrinter::print(const MachineInstr &MI) { for (const auto *Op : MI.memoperands()) { if (NeedComma) OS << ", "; - Op->print(OS, MST, SSNs, Context, &MFI, TII); + Op->print(OS, MST, SSNs, Context, &MFI, TII, MIRF); NeedComma = true; } } @@ -856,7 +857,7 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx, if (ShouldPrintRegisterTies && Op.isReg() && Op.isTied() && !Op.isDef()) TiedOperandIdx = Op.getParent()->findTiedOperandIdx(OpIdx); const TargetIntrinsicInfo *TII = MI.getMF()->getTarget().getIntrinsicInfo(); - Op.print(OS, MST, TypeToPrint, PrintDef, /*IsStandalone=*/false, + Op.print(OS, MST, TypeToPrint, OpIdx, PrintDef, /*IsStandalone=*/false, ShouldPrintRegisterTies, TiedOperandIdx, TRI, TII); break; } @@ -874,6 +875,28 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx, } } +void MIRFormatter::printIRValue(raw_ostream &OS, const Value &V, + ModuleSlotTracker &MST) { + if (isa(V)) { + V.printAsOperand(OS, /*PrintType=*/false, MST); + return; + } + if (isa(V)) { + // Machine memory operands can load/store to/from constant value pointers. + OS << '`'; + V.printAsOperand(OS, /*PrintType=*/true, MST); + OS << '`'; + return; + } + OS << "%ir."; + if (V.hasName()) { + printLLVMNameWithoutPrefix(OS, V.getName()); + return; + } + int Slot = MST.getCurrentFunction() ? MST.getLocalSlot(&V) : -1; + MachineOperand::printIRSlotNumber(OS, Slot); +} + void llvm::printMIR(raw_ostream &OS, const Module &M) { yaml::Output Out(OS); Out << const_cast(M); diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 16ae732169a48..177fef80e2e68 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -89,13 +89,15 @@ static void tryToGetTargetInfo(const MachineInstr &MI, const TargetRegisterInfo *&TRI, const MachineRegisterInfo *&MRI, const TargetIntrinsicInfo *&IntrinsicInfo, - const TargetInstrInfo *&TII) { + const TargetInstrInfo *&TII, + const MIRFormatter *&MIRF) { if (const MachineFunction *MF = getMFIfAvailable(MI)) { TRI = MF->getSubtarget().getRegisterInfo(); MRI = &MF->getRegInfo(); IntrinsicInfo = MF->getTarget().getIntrinsicInfo(); TII = MF->getSubtarget().getInstrInfo(); + MIRF = MF->getTarget().getMIRFormatter(); } } @@ -1477,7 +1479,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, const TargetRegisterInfo *TRI = nullptr; const MachineRegisterInfo *MRI = nullptr; const TargetIntrinsicInfo *IntrinsicInfo = nullptr; - tryToGetTargetInfo(*this, TRI, MRI, IntrinsicInfo, TII); + const MIRFormatter *MIRF = nullptr; + tryToGetTargetInfo(*this, TRI, MRI, IntrinsicInfo, TII, MIRF); if (isCFIInstruction()) assert(getNumOperands() == 1 && "Expected 1 operand in CFI instruction"); @@ -1506,7 +1509,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, LLT TypeToPrint = MRI ? getTypeToPrint(StartOp, PrintedTypes, *MRI) : LLT{}; unsigned TiedOperandIdx = getTiedOperandIdx(StartOp); - MO.print(OS, MST, TypeToPrint, /*PrintDef=*/false, IsStandalone, + MO.print(OS, MST, TypeToPrint, StartOp, /*PrintDef=*/false, IsStandalone, ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo); ++StartOp; } @@ -1561,7 +1564,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, const unsigned OpIdx = InlineAsm::MIOp_AsmString; LLT TypeToPrint = MRI ? getTypeToPrint(OpIdx, PrintedTypes, *MRI) : LLT{}; unsigned TiedOperandIdx = getTiedOperandIdx(OpIdx); - getOperand(OpIdx).print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone, + getOperand(OpIdx).print(OS, MST, TypeToPrint, OpIdx, /*PrintDef=*/true, IsStandalone, ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo); @@ -1600,7 +1603,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, else { LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{}; unsigned TiedOperandIdx = getTiedOperandIdx(i); - MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone, + MO.print(OS, MST, TypeToPrint, i, /*PrintDef=*/true, IsStandalone, ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo); } } else if (isDebugLabel() && MO.isMetadata()) { @@ -1611,7 +1614,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, else { LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{}; unsigned TiedOperandIdx = getTiedOperandIdx(i); - MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone, + MO.print(OS, MST, TypeToPrint, i, /*PrintDef=*/true, IsStandalone, ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo); } } else if (i == AsmDescOp && MO.isImm()) { @@ -1678,7 +1681,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, if (MO.isImm() && isOperandSubregIdx(i)) MachineOperand::printSubRegIdx(OS, MO.getImm(), TRI); else - MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone, + MO.print(OS, MST, TypeToPrint, i, /*PrintDef=*/true, IsStandalone, ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo); } } @@ -1737,7 +1740,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, for (const MachineMemOperand *Op : memoperands()) { if (NeedComma) OS << ", "; - Op->print(OS, MST, SSNs, *Context, MFI, TII); + Op->print(OS, MST, SSNs, *Context, MFI, TII, MIRF); NeedComma = true; } } diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index 8b19501ec3cf1..5dd98467ba663 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/CodeGen/MIRFormatter.h" #include "llvm/CodeGen/MIRPrinter.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -458,28 +459,6 @@ static void printIRBlockReference(raw_ostream &OS, const BasicBlock &BB, OS << ""; } -static void printIRValueReference(raw_ostream &OS, const Value &V, - ModuleSlotTracker &MST) { - if (isa(V)) { - V.printAsOperand(OS, /*PrintType=*/false, MST); - return; - } - if (isa(V)) { - // Machine memory operands can load/store to/from constant value pointers. - OS << '`'; - V.printAsOperand(OS, /*PrintType=*/true, MST); - OS << '`'; - return; - } - OS << "%ir."; - if (V.hasName()) { - printLLVMNameWithoutPrefix(OS, V.getName()); - return; - } - int Slot = MST.getCurrentFunction() ? MST.getLocalSlot(&V) : -1; - MachineOperand::printIRSlotNumber(OS, Slot); -} - static void printSyncScope(raw_ostream &OS, const LLVMContext &Context, SyncScope::ID SSID, SmallVectorImpl &SSNs) { @@ -734,14 +713,15 @@ void MachineOperand::print(raw_ostream &OS, LLT TypeToPrint, const TargetIntrinsicInfo *IntrinsicInfo) const { tryToGetTargetInfo(*this, TRI, IntrinsicInfo); ModuleSlotTracker DummyMST(nullptr); - print(OS, DummyMST, TypeToPrint, /*PrintDef=*/false, /*IsStandalone=*/true, + print(OS, DummyMST, TypeToPrint, None, /*PrintDef=*/false, + /*IsStandalone=*/true, /*ShouldPrintRegisterTies=*/true, /*TiedOperandIdx=*/0, TRI, IntrinsicInfo); } void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, - LLT TypeToPrint, bool PrintDef, bool IsStandalone, - bool ShouldPrintRegisterTies, + LLT TypeToPrint, Optional OpIdx, bool PrintDef, + bool IsStandalone, bool ShouldPrintRegisterTies, unsigned TiedOperandIdx, const TargetRegisterInfo *TRI, const TargetIntrinsicInfo *IntrinsicInfo) const { @@ -802,9 +782,16 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << '(' << TypeToPrint << ')'; break; } - case MachineOperand::MO_Immediate: - OS << getImm(); + case MachineOperand::MO_Immediate: { + const MIRFormatter *Formatter = nullptr; + if (const MachineFunction *MF = getMFIfAvailable(*this)) + Formatter = MF->getTarget().getMIRFormatter(); + if (Formatter) + Formatter->printImm(OS, *getParent(), OpIdx, getImm()); + else + OS << getImm(); break; + } case MachineOperand::MO_CImmediate: getCImm()->printAsOperand(OS, /*PrintType=*/true, MST); break; @@ -1070,7 +1057,8 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, SmallVectorImpl &SSNs, const LLVMContext &Context, const MachineFrameInfo *MFI, - const TargetInstrInfo *TII) const { + const TargetInstrInfo *TII, + const MIRFormatter* MIRF) const { OS << '('; if (isVolatile()) OS << "volatile "; @@ -1111,7 +1099,7 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, if (const Value *Val = getValue()) { OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into "); - printIRValueReference(OS, *Val, MST); + MIRFormatter::printIRValue(OS, *Val, MST); } else if (const PseudoSourceValue *PVal = getPseudoValue()) { OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into "); assert(PVal && "Expected a pseudo source value"); @@ -1144,15 +1132,20 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, printLLVMNameWithoutPrefix( OS, cast(PVal)->getSymbol()); break; - default: + default: { // FIXME: This is not necessarily the correct MIR serialization format for // a custom pseudo source value, but at least it allows // -print-machineinstrs to work on a target with custom pseudo source // values. - OS << "custom "; - PVal->printCustom(OS); + OS << "custom \""; + if (MIRF) + MIRF->printCustomPseudoSourceValue(OS, MST, *PVal); + else + PVal->printCustom(OS); + OS << '\"'; break; } + } } MachineOperand::printOperandOffset(OS, getOffset()); if (getBaseAlignment() != getSize()) diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index ca57e51268e88..b2534c2e53d4a 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -2312,6 +2312,32 @@ void MachineVerifier::visitMachineFunctionAfter() { if (LiveInts) verifyLiveIntervals(); + // Check live-in list of each MBB. If a register is live into MBB, check + // that the register is in regsLiveOut of each predecessor block. Since + // this must come from a definition in the predecesssor or its live-in + // list, this will catch a live-through case where the predecessor does not + // have the register in its live-in list. This currently only checks + // registers that have no aliases, are not allocatable and are not + // reserved, which could mean a condition code register for instance. + if (MRI->tracksLiveness()) + for (const auto &MBB : *MF) + for (MachineBasicBlock::RegisterMaskPair P : MBB.liveins()) { + MCPhysReg LiveInReg = P.PhysReg; + bool hasAliases = MCRegAliasIterator(LiveInReg, TRI, false).isValid(); + if (hasAliases || isAllocatable(LiveInReg) || isReserved(LiveInReg)) + continue; + for (const MachineBasicBlock *Pred : MBB.predecessors()) { + BBInfo &PInfo = MBBInfoMap[Pred]; + if (!PInfo.regsLiveOut.count(LiveInReg)) { + report("Live in register not found to be live out from predecessor.", + &MBB); + errs() << TRI->getName(LiveInReg) + << " not found to be live out from " + << printMBBReference(*Pred) << "\n"; + } + } + } + for (auto CSInfo : MF->getCallSitesInfo()) if (!CSInfo.first->isCall()) report("Call site info referencing instruction that is not call", MF); diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index d583643ac68f7..3909b57172814 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -259,10 +259,6 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) { for (auto &I : EntryDbgValues) I.first->insert(I.first->begin(), I.second.begin(), I.second.end()); - // Allow the target machine to make final modifications to the function - // before the frame layout is finalized. - TFI->processFunctionBeforeFrameIndicesReplaced(MF, RS); - // Replace all MO_FrameIndex operands with physical register references // and actual offsets. // diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index cfc4671eaa0e4..6030c95742015 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12639,6 +12639,15 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { } } + // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z)) + // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z)) + if (!TLI.isFNegFree(VT) && + TLI.isNegatibleForFree(SDValue(N, 0), DAG, LegalOperations, + ForCodeSize) == 2) + return DAG.getNode(ISD::FNEG, DL, VT, + TLI.getNegatedExpression(SDValue(N, 0), DAG, + LegalOperations, ForCodeSize), + Flags); return SDValue(); } @@ -18585,8 +18594,22 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { if (ConcatSrcNumElts == ExtNumElts) return V.getOperand(ConcatOpIdx); - // TODO: Handle the case where the concat operands are larger than the - // result of this extract by extracting directly from a concat op. + // If the concatenated source vectors are a multiple length of this extract, + // then extract a fraction of one of those source vectors directly from a + // concat operand. Example: + // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 --> + // v2i8 extract_subvec v8i8 Y, 6 + if (ConcatSrcNumElts % ExtNumElts == 0) { + SDLoc DL(N); + unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts; + assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts && + "Trying to extract from >1 concat operand?"); + assert(NewExtIdx % ExtNumElts == 0 && + "Extract index is not a multiple of the input vector length."); + SDValue NewIndexC = DAG.getIntPtrConstant(NewExtIdx, DL); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, + V.getOperand(ConcatOpIdx), NewIndexC); + } } V = peekThroughBitcasts(V); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 6fd71393bf38c..cbdcb93e60c90 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -480,7 +480,8 @@ static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO, if (MF) MST.incorporateFunction(MF->getFunction()); SmallVector SSNs; - MMO.print(OS, MST, SSNs, Ctx, MFI, TII); + MMO.print(OS, MST, SSNs, Ctx, MFI, TII, + MF ? MF->getTarget().getMIRFormatter() : nullptr); } static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO, diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp index 8aed9ab653a16..0c5f9a9c54ec6 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp @@ -79,9 +79,9 @@ DWARFLocationInterpreter::Interpret(const DWARFLocationEntry &E) { } case dwarf::DW_LLE_offset_pair: { if (!Base) { - return createStringError( - inconvertibleErrorCode(), - "Unable to resolve DW_LLE_offset_pair: base address unknown"); + return createStringError(inconvertibleErrorCode(), + "Unable to resolve location list offset pair: " + "Base address not defined"); } DWARFAddressRange Range{Base->Address + E.Value0, Base->Address + E.Value1, Base->SectionIndex}; diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp index 63b98f26ba193..04e34a90a9bcf 100644 --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -147,9 +147,9 @@ LLVMAttributeRef LLVMCreateEnumAttribute(LLVMContextRef C, unsigned KindID, // After r362128, byval attributes need to have a type attribute. Provide a // NULL one until a proper API is added for this. return wrap(Attribute::getWithByValType(Ctx, NULL)); - } else { - return wrap(Attribute::get(Ctx, AttrKind, Val)); } + + return wrap(Attribute::get(Ctx, AttrKind, Val)); } unsigned LLVMGetEnumAttributeKind(LLVMAttributeRef A) { diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 50648ba17945b..d232946af2942 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4779,7 +4779,7 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) { case Intrinsic::experimental_constrained_fcmp: case Intrinsic::experimental_constrained_fcmps: { - auto Pred = dyn_cast(&FPI)->getPredicate(); + auto Pred = cast(&FPI)->getPredicate(); Assert(CmpInst::isFPPredicate(Pred), "invalid predicate for constrained FP comparison intrinsic", &FPI); break; diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 0c4eb953aa4e6..dc8132b627a66 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -3130,7 +3130,9 @@ bool AsmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) { Value = APFloat::getNaN(Semantics, false, ~0); else return TokError("invalid floating point literal"); - } else if (!Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven)) + } else if (errorToBool( + Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven) + .takeError())) return TokError("invalid floating point literal"); if (IsNeg) Value.changeSign(); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 64d748f94f144..646eb7d26cbdd 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -970,8 +970,7 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline( // across the loop nests. // We do UnrollAndJam in a separate LPM to ensure it happens before unroll if (EnableUnrollAndJam && PTO.LoopUnrolling) { - OptimizePM.addPass( - createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level))); + OptimizePM.addPass(LoopUnrollAndJamPass(Level)); } OptimizePM.addPass(LoopUnrollPass( LoopUnrollOptions(Level, /*OnlyWhenForced=*/!PTO.LoopUnrolling, diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 66b38872b386e..3efb57cd35890 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -235,6 +235,7 @@ FUNCTION_PASS("spec-phis", SpeculateAroundPHIsPass()) FUNCTION_PASS("sroa", SROA()) FUNCTION_PASS("tailcallelim", TailCallElimPass()) FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass()) +FUNCTION_PASS("unroll-and-jam", LoopUnrollAndJamPass()) FUNCTION_PASS("verify", VerifierPass()) FUNCTION_PASS("verify", DominatorTreeVerifierPass()) FUNCTION_PASS("verify", LoopVerifierPass()) @@ -307,7 +308,6 @@ LOOP_PASS("simplify-cfg", LoopSimplifyCFGPass()) LOOP_PASS("strength-reduce", LoopStrengthReducePass()) LOOP_PASS("indvars", IndVarSimplifyPass()) LOOP_PASS("irce", IRCEPass()) -LOOP_PASS("unroll-and-jam", LoopUnrollAndJamPass()) LOOP_PASS("unroll-full", LoopFullUnrollPass()) LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs())) LOOP_PASS("print", DDGAnalysisPrinterPass(dbgs())) diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp index f8a217d3535de..050c37baefb87 100644 --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -4518,9 +4518,8 @@ hash_code hash_value(const APFloat &Arg) { APFloat::APFloat(const fltSemantics &Semantics, StringRef S) : APFloat(Semantics) { auto StatusOrErr = convertFromString(S, rmNearestTiesToEven); - if (!StatusOrErr) { - assert(false && "Invalid floating point representation"); - } + assert(StatusOrErr && "Invalid floating point representation"); + consumeError(StatusOrErr.takeError()); } APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics, diff --git a/llvm/lib/Support/ARMTargetParser.cpp b/llvm/lib/Support/ARMTargetParser.cpp index ce5daa7fe58c0..f2c22fd93c8b8 100644 --- a/llvm/lib/Support/ARMTargetParser.cpp +++ b/llvm/lib/Support/ARMTargetParser.cpp @@ -174,8 +174,6 @@ bool ARM::getFPUFeatures(unsigned FPUKind, std::vector &Features) { // under FPURestriction::None, which is the only FPURestriction in // which they would be valid (since FPURestriction::SP doesn't // exist). - - {"+fpregs", "-fpregs", FPUVersion::VFPV2, FPURestriction::SP_D16}, {"+vfp2", "-vfp2", FPUVersion::VFPV2, FPURestriction::D16}, {"+vfp2sp", "-vfp2sp", FPUVersion::VFPV2, FPURestriction::SP_D16}, {"+vfp3", "-vfp3", FPUVersion::VFPV3, FPURestriction::None}, diff --git a/llvm/lib/Support/StringRef.cpp b/llvm/lib/Support/StringRef.cpp index b5db172cc1a3a..104482de4ad70 100644 --- a/llvm/lib/Support/StringRef.cpp +++ b/llvm/lib/Support/StringRef.cpp @@ -588,13 +588,11 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const { bool StringRef::getAsDouble(double &Result, bool AllowInexact) const { APFloat F(0.0); - auto ErrOrStatus = F.convertFromString(*this, APFloat::rmNearestTiesToEven); - if (!ErrOrStatus) { - assert(false && "Invalid floating point representation"); + auto StatusOrErr = F.convertFromString(*this, APFloat::rmNearestTiesToEven); + if (errorToBool(StatusOrErr.takeError())) return true; - } - APFloat::opStatus Status = *ErrOrStatus; + APFloat::opStatus Status = *StatusOrErr; if (Status != APFloat::opOK) { if (!AllowInexact || !(Status & APFloat::opInexact)) return true; diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp index 618670553d091..773d6f05a9e5d 100644 --- a/llvm/lib/Support/Triple.cpp +++ b/llvm/lib/Support/Triple.cpp @@ -21,58 +21,59 @@ StringRef Triple::getArchTypeName(ArchType Kind) { case UnknownArch: return "unknown"; case aarch64: return "aarch64"; - case aarch64_be: return "aarch64_be"; case aarch64_32: return "aarch64_32"; + case aarch64_be: return "aarch64_be"; + case amdgcn: return "amdgcn"; + case amdil64: return "amdil64"; + case amdil: return "amdil"; + case arc: return "arc"; case arm: return "arm"; case armeb: return "armeb"; - case arc: return "arc"; case avr: return "avr"; - case bpfel: return "bpfel"; case bpfeb: return "bpfeb"; + case bpfel: return "bpfel"; + case fpga_aoco: return "fpga_aoco"; + case fpga_aocr: return "fpga_aocr"; + case fpga_aocx: return "fpga_aocx"; case hexagon: return "hexagon"; - case mips: return "mips"; - case mipsel: return "mipsel"; + case hsail64: return "hsail64"; + case hsail: return "hsail"; + case kalimba: return "kalimba"; + case lanai: return "lanai"; + case le32: return "le32"; + case le64: return "le64"; case mips64: return "mips64"; case mips64el: return "mips64el"; + case mips: return "mips"; + case mipsel: return "mipsel"; case msp430: return "msp430"; + case nvptx64: return "nvptx64"; + case nvptx: return "nvptx"; case ppc64: return "powerpc64"; case ppc64le: return "powerpc64le"; case ppc: return "powerpc"; case r600: return "r600"; - case amdgcn: return "amdgcn"; + case renderscript32: return "renderscript32"; + case renderscript64: return "renderscript64"; case riscv32: return "riscv32"; case riscv64: return "riscv64"; + case shave: return "shave"; case sparc: return "sparc"; - case sparcv9: return "sparcv9"; case sparcel: return "sparcel"; + case sparcv9: return "sparcv9"; + case spir64: return "spir64"; + case spir: return "spir"; case systemz: return "s390x"; case tce: return "tce"; case tcele: return "tcele"; case thumb: return "thumb"; case thumbeb: return "thumbeb"; + case ve: return "ve"; + case wasm32: return "wasm32"; + case wasm64: return "wasm64"; case x86: return "i386"; case x86_64: return "x86_64"; case xcore: return "xcore"; - case nvptx: return "nvptx"; - case nvptx64: return "nvptx64"; - case le32: return "le32"; - case le64: return "le64"; - case amdil: return "amdil"; - case amdil64: return "amdil64"; - case hsail: return "hsail"; - case hsail64: return "hsail64"; - case spir: return "spir"; - case spir64: return "spir64"; - case kalimba: return "kalimba"; - case lanai: return "lanai"; - case shave: return "shave"; - case wasm32: return "wasm32"; - case wasm64: return "wasm64"; - case renderscript32: return "renderscript32"; - case renderscript64: return "renderscript64"; - case fpga_aoco: return "fpga_aoco"; - case fpga_aocr: return "fpga_aocr"; - case fpga_aocx: return "fpga_aocx"; } llvm_unreachable("Invalid ArchType!"); @@ -151,6 +152,8 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) { case fpga_aoco: case fpga_aocr: case fpga_aocx: return "fpga"; + + case ve: return "ve"; } } @@ -158,23 +161,23 @@ StringRef Triple::getVendorTypeName(VendorType Kind) { switch (Kind) { case UnknownVendor: return "unknown"; + case AMD: return "amd"; case Apple: return "apple"; - case PC: return "pc"; - case SCEI: return "scei"; case BGP: return "bgp"; case BGQ: return "bgq"; + case CSR: return "csr"; case Freescale: return "fsl"; case IBM: return "ibm"; case ImaginationTechnologies: return "img"; case Intel: return "intel"; + case Mesa: return "mesa"; case MipsTechnologies: return "mti"; - case NVIDIA: return "nvidia"; - case CSR: return "csr"; case Myriad: return "myriad"; - case AMD: return "amd"; - case Mesa: return "mesa"; - case SUSE: return "suse"; + case NVIDIA: return "nvidia"; case OpenEmbedded: return "oe"; + case PC: return "pc"; + case SCEI: return "scei"; + case SUSE: return "suse"; } llvm_unreachable("Invalid VendorType!"); @@ -184,41 +187,41 @@ StringRef Triple::getOSTypeName(OSType Kind) { switch (Kind) { case UnknownOS: return "unknown"; + case AIX: return "aix"; + case AMDHSA: return "amdhsa"; + case AMDPAL: return "amdpal"; case Ananas: return "ananas"; + case CNK: return "cnk"; + case CUDA: return "cuda"; case CloudABI: return "cloudabi"; + case Contiki: return "contiki"; case Darwin: return "darwin"; case DragonFly: return "dragonfly"; + case ELFIAMCU: return "elfiamcu"; + case Emscripten: return "emscripten"; case FreeBSD: return "freebsd"; case Fuchsia: return "fuchsia"; + case Haiku: return "haiku"; + case HermitCore: return "hermit"; + case Hurd: return "hurd"; case IOS: return "ios"; case KFreeBSD: return "kfreebsd"; case Linux: return "linux"; case Lv2: return "lv2"; case MacOSX: return "macosx"; - case NetBSD: return "netbsd"; - case OpenBSD: return "openbsd"; - case Solaris: return "solaris"; - case Win32: return "windows"; - case Haiku: return "haiku"; + case Mesa3D: return "mesa3d"; case Minix: return "minix"; - case RTEMS: return "rtems"; - case NaCl: return "nacl"; - case CNK: return "cnk"; - case AIX: return "aix"; - case CUDA: return "cuda"; case NVCL: return "nvcl"; - case AMDHSA: return "amdhsa"; + case NaCl: return "nacl"; + case NetBSD: return "netbsd"; + case OpenBSD: return "openbsd"; case PS4: return "ps4"; - case ELFIAMCU: return "elfiamcu"; + case RTEMS: return "rtems"; + case Solaris: return "solaris"; case TvOS: return "tvos"; - case WatchOS: return "watchos"; - case Mesa3D: return "mesa3d"; - case Contiki: return "contiki"; - case AMDPAL: return "amdpal"; - case HermitCore: return "hermit"; - case Hurd: return "hurd"; case WASI: return "wasi"; - case Emscripten: return "emscripten"; + case WatchOS: return "watchos"; + case Win32: return "windows"; } llvm_unreachable("Invalid OSType"); @@ -227,26 +230,26 @@ StringRef Triple::getOSTypeName(OSType Kind) { StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) { switch (Kind) { case UnknownEnvironment: return "unknown"; + case Android: return "android"; + case CODE16: return "code16"; + case CoreCLR: return "coreclr"; + case Cygnus: return "cygnus"; + case EABI: return "eabi"; + case EABIHF: return "eabihf"; case GNU: return "gnu"; - case GNUABIN32: return "gnuabin32"; case GNUABI64: return "gnuabi64"; - case GNUEABIHF: return "gnueabihf"; + case GNUABIN32: return "gnuabin32"; case GNUEABI: return "gnueabi"; + case GNUEABIHF: return "gnueabihf"; case GNUX32: return "gnux32"; - case CODE16: return "code16"; - case EABI: return "eabi"; - case EABIHF: return "eabihf"; - case Android: return "android"; + case Itanium: return "itanium"; + case MSVC: return "msvc"; + case MacABI: return "macabi"; case Musl: return "musl"; case MuslEABI: return "musleabi"; case MuslEABIHF: return "musleabihf"; - case MSVC: return "msvc"; - case Itanium: return "itanium"; - case Cygnus: return "cygnus"; - case CoreCLR: return "coreclr"; - case Simulator: return "simulator"; case SYCLDevice: return "sycldevice"; - case MacABI: return "macabi"; + case Simulator: return "simulator"; } llvm_unreachable("Invalid EnvironmentType!"); @@ -325,6 +328,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { .Case("fpga_aoco", fpga_aoco) .Case("fpga_aocr", fpga_aocr) .Case("fpga_aocx", fpga_aocx) + .Case("ve", ve) .Default(UnknownArch); } @@ -399,7 +403,7 @@ static Triple::ArchType parseArch(StringRef ArchName) { // FIXME: Do we need to support these? .Cases("i786", "i886", "i986", Triple::x86) .Cases("amd64", "x86_64", "x86_64h", Triple::x86_64) - .Cases("powerpc", "ppc", "ppc32", Triple::ppc) + .Cases("powerpc", "powerpcspe", "ppc", "ppc32", Triple::ppc) .Cases("powerpc64", "ppu", "ppc64", Triple::ppc64) .Cases("powerpc64le", "ppc64le", Triple::ppc64le) .Case("xscale", Triple::arm) @@ -448,14 +452,15 @@ static Triple::ArchType parseArch(StringRef ArchName) { .StartsWith("spir", Triple::spir) .StartsWith("kalimba", Triple::kalimba) .Case("lanai", Triple::lanai) - .Case("shave", Triple::shave) - .Case("wasm32", Triple::wasm32) - .Case("wasm64", Triple::wasm64) .Case("renderscript32", Triple::renderscript32) .Case("renderscript64", Triple::renderscript64) .Case("fpga_aoco", Triple::fpga_aoco) .Case("fpga_aocr", Triple::fpga_aocr) .Case("fpga_aocx", Triple::fpga_aocx) + .Case("shave", Triple::shave) + .Case("ve", Triple::ve) + .Case("wasm32", Triple::wasm32) + .Case("wasm64", Triple::wasm64) .Default(Triple::UnknownArch); // Some architectures require special parsing logic just to compute the @@ -588,6 +593,9 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) { } } + if (SubArchName == "powerpcspe") + return Triple::PPCSubArch_spe; + StringRef ARMSubArch = ARM::getCanonicalArchName(SubArchName); // For now, this is the small part. Early return. @@ -662,10 +670,10 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) { static StringRef getObjectFormatTypeName(Triple::ObjectFormatType Kind) { switch (Kind) { case Triple::UnknownObjectFormat: return ""; - case Triple::COFF: return "coff"; - case Triple::ELF: return "elf"; + case Triple::COFF: return "coff"; + case Triple::ELF: return "elf"; case Triple::MachO: return "macho"; - case Triple::Wasm: return "wasm"; + case Triple::Wasm: return "wasm"; case Triple::XCOFF: return "xcoff"; } llvm_unreachable("unknown object format type"); @@ -687,28 +695,31 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) { return Triple::ELF; case Triple::aarch64_be: - case Triple::arc: case Triple::amdgcn: - case Triple::amdil: case Triple::amdil64: + case Triple::amdil: + case Triple::arc: case Triple::armeb: case Triple::avr: case Triple::bpfeb: case Triple::bpfel: + case Triple::fpga_aoco: + case Triple::fpga_aocr: + case Triple::fpga_aocx: case Triple::hexagon: - case Triple::lanai: - case Triple::hsail: case Triple::hsail64: + case Triple::hsail: case Triple::kalimba: + case Triple::lanai: case Triple::le32: case Triple::le64: - case Triple::mips: case Triple::mips64: case Triple::mips64el: + case Triple::mips: case Triple::mipsel: case Triple::msp430: - case Triple::nvptx: case Triple::nvptx64: + case Triple::nvptx: case Triple::ppc64le: case Triple::r600: case Triple::renderscript32: @@ -719,20 +730,18 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) { case Triple::sparc: case Triple::sparcel: case Triple::sparcv9: - case Triple::spir: case Triple::spir64: + case Triple::spir: case Triple::systemz: case Triple::tce: case Triple::tcele: case Triple::thumbeb: + case Triple::ve: case Triple::xcore: - case Triple::fpga_aoco: - case Triple::fpga_aocr: - case Triple::fpga_aocx: return Triple::ELF; - case Triple::ppc: case Triple::ppc64: + case Triple::ppc: if (T.isOSDarwin()) return Triple::MachO; else if (T.isOSAIX()) @@ -1263,58 +1272,59 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { return 16; case llvm::Triple::aarch64_32: + case llvm::Triple::amdil: case llvm::Triple::arc: case llvm::Triple::arm: case llvm::Triple::armeb: + case llvm::Triple::fpga_aoco: + case llvm::Triple::fpga_aocr: + case llvm::Triple::fpga_aocx: case llvm::Triple::hexagon: + case llvm::Triple::hsail: + case llvm::Triple::kalimba: + case llvm::Triple::lanai: case llvm::Triple::le32: case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::nvptx: case llvm::Triple::ppc: case llvm::Triple::r600: + case llvm::Triple::renderscript32: case llvm::Triple::riscv32: + case llvm::Triple::shave: case llvm::Triple::sparc: case llvm::Triple::sparcel: + case llvm::Triple::spir: case llvm::Triple::tce: case llvm::Triple::tcele: case llvm::Triple::thumb: case llvm::Triple::thumbeb: + case llvm::Triple::wasm32: case llvm::Triple::x86: case llvm::Triple::xcore: - case llvm::Triple::amdil: - case llvm::Triple::hsail: - case llvm::Triple::spir: - case llvm::Triple::kalimba: - case llvm::Triple::lanai: - case llvm::Triple::shave: - case llvm::Triple::wasm32: - case llvm::Triple::renderscript32: - case llvm::Triple::fpga_aoco: - case llvm::Triple::fpga_aocr: - case llvm::Triple::fpga_aocx: return 32; case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: case llvm::Triple::amdgcn: - case llvm::Triple::bpfel: + case llvm::Triple::amdil64: case llvm::Triple::bpfeb: + case llvm::Triple::bpfel: + case llvm::Triple::hsail64: case llvm::Triple::le64: case llvm::Triple::mips64: case llvm::Triple::mips64el: case llvm::Triple::nvptx64: case llvm::Triple::ppc64: case llvm::Triple::ppc64le: + case llvm::Triple::renderscript64: case llvm::Triple::riscv64: case llvm::Triple::sparcv9: - case llvm::Triple::systemz: - case llvm::Triple::x86_64: - case llvm::Triple::amdil64: - case llvm::Triple::hsail64: case llvm::Triple::spir64: + case llvm::Triple::systemz: + case llvm::Triple::ve: case llvm::Triple::wasm64: - case llvm::Triple::renderscript64: + case llvm::Triple::x86_64: return 64; } llvm_unreachable("Invalid architecture value"); @@ -1338,63 +1348,64 @@ Triple Triple::get32BitArchVariant() const { case Triple::UnknownArch: case Triple::amdgcn: case Triple::avr: - case Triple::bpfel: case Triple::bpfeb: + case Triple::bpfel: case Triple::msp430: - case Triple::systemz: case Triple::ppc64le: + case Triple::systemz: + case Triple::ve: T.setArch(UnknownArch); break; case Triple::aarch64_32: case Triple::amdil: - case Triple::hsail: - case Triple::spir: case Triple::arc: case Triple::arm: case Triple::armeb: + case Triple::fpga_aoco: + case Triple::fpga_aocr: + case Triple::fpga_aocx: case Triple::hexagon: + case Triple::hsail: case Triple::kalimba: + case Triple::lanai: case Triple::le32: case Triple::mips: case Triple::mipsel: case Triple::nvptx: case Triple::ppc: case Triple::r600: + case Triple::renderscript32: case Triple::riscv32: + case Triple::shave: case Triple::sparc: case Triple::sparcel: + case Triple::spir: case Triple::tce: case Triple::tcele: case Triple::thumb: case Triple::thumbeb: + case Triple::wasm32: case Triple::x86: case Triple::xcore: - case Triple::lanai: - case Triple::shave: - case Triple::wasm32: - case Triple::renderscript32: - case Triple::fpga_aoco: - case Triple::fpga_aocr: - case Triple::fpga_aocx: // Already 32-bit. break; case Triple::aarch64: T.setArch(Triple::arm); break; case Triple::aarch64_be: T.setArch(Triple::armeb); break; + case Triple::amdil64: T.setArch(Triple::amdil); break; + case Triple::hsail64: T.setArch(Triple::hsail); break; case Triple::le64: T.setArch(Triple::le32); break; case Triple::mips64: T.setArch(Triple::mips); break; case Triple::mips64el: T.setArch(Triple::mipsel); break; case Triple::nvptx64: T.setArch(Triple::nvptx); break; case Triple::ppc64: T.setArch(Triple::ppc); break; - case Triple::sparcv9: T.setArch(Triple::sparc); break; + case Triple::renderscript64: T.setArch(Triple::renderscript32); break; case Triple::riscv64: T.setArch(Triple::riscv32); break; - case Triple::x86_64: T.setArch(Triple::x86); break; - case Triple::amdil64: T.setArch(Triple::amdil); break; - case Triple::hsail64: T.setArch(Triple::hsail); break; + case Triple::sparcv9: T.setArch(Triple::sparc); break; case Triple::spir64: T.setArch(Triple::spir); break; case Triple::wasm64: T.setArch(Triple::wasm32); break; - case Triple::renderscript64: T.setArch(Triple::renderscript32); break; + case Triple::x86_64: T.setArch(Triple::x86); break; } return T; } @@ -1405,63 +1416,64 @@ Triple Triple::get64BitArchVariant() const { case Triple::UnknownArch: case Triple::arc: case Triple::avr: + case Triple::fpga_aoco: + case Triple::fpga_aocr: + case Triple::fpga_aocx: case Triple::hexagon: case Triple::kalimba: case Triple::lanai: case Triple::msp430: case Triple::r600: + case Triple::shave: + case Triple::sparcel: case Triple::tce: case Triple::tcele: case Triple::xcore: - case Triple::sparcel: - case Triple::shave: - case Triple::fpga_aoco: - case Triple::fpga_aocr: - case Triple::fpga_aocx: T.setArch(UnknownArch); break; case Triple::aarch64: case Triple::aarch64_be: - case Triple::bpfel: - case Triple::bpfeb: - case Triple::le64: - case Triple::amdil64: case Triple::amdgcn: + case Triple::amdil64: + case Triple::bpfeb: + case Triple::bpfel: case Triple::hsail64: - case Triple::spir64: + case Triple::le64: case Triple::mips64: case Triple::mips64el: case Triple::nvptx64: case Triple::ppc64: case Triple::ppc64le: + case Triple::renderscript64: case Triple::riscv64: case Triple::sparcv9: + case Triple::spir64: case Triple::systemz: - case Triple::x86_64: + case Triple::ve: case Triple::wasm64: - case Triple::renderscript64: + case Triple::x86_64: // Already 64-bit. break; case Triple::aarch64_32: T.setArch(Triple::aarch64); break; + case Triple::amdil: T.setArch(Triple::amdil64); break; case Triple::arm: T.setArch(Triple::aarch64); break; case Triple::armeb: T.setArch(Triple::aarch64_be); break; + case Triple::hsail: T.setArch(Triple::hsail64); break; case Triple::le32: T.setArch(Triple::le64); break; case Triple::mips: T.setArch(Triple::mips64); break; case Triple::mipsel: T.setArch(Triple::mips64el); break; case Triple::nvptx: T.setArch(Triple::nvptx64); break; case Triple::ppc: T.setArch(Triple::ppc64); break; - case Triple::sparc: T.setArch(Triple::sparcv9); break; + case Triple::renderscript32: T.setArch(Triple::renderscript64); break; case Triple::riscv32: T.setArch(Triple::riscv64); break; - case Triple::x86: T.setArch(Triple::x86_64); break; - case Triple::amdil: T.setArch(Triple::amdil64); break; - case Triple::hsail: T.setArch(Triple::hsail64); break; + case Triple::sparc: T.setArch(Triple::sparcv9); break; case Triple::spir: T.setArch(Triple::spir64); break; case Triple::thumb: T.setArch(Triple::aarch64); break; case Triple::thumbeb: T.setArch(Triple::aarch64_be); break; case Triple::wasm32: T.setArch(Triple::wasm64); break; - case Triple::renderscript32: T.setArch(Triple::renderscript64); break; + case Triple::x86: T.setArch(Triple::x86_64); break; } return T; } @@ -1487,6 +1499,8 @@ Triple Triple::getBigEndianArchVariant() const { case Triple::nvptx64: case Triple::nvptx: case Triple::r600: + case Triple::renderscript32: + case Triple::renderscript64: case Triple::riscv32: case Triple::riscv64: case Triple::shave: @@ -1497,8 +1511,7 @@ Triple Triple::getBigEndianArchVariant() const { case Triple::x86: case Triple::x86_64: case Triple::xcore: - case Triple::renderscript32: - case Triple::renderscript64: + case Triple::ve: // ARM is intentionally unsupported here, changing the architecture would // drop any arch suffixes. @@ -1507,13 +1520,13 @@ Triple Triple::getBigEndianArchVariant() const { T.setArch(UnknownArch); break; - case Triple::tcele: T.setArch(Triple::tce); break; case Triple::aarch64: T.setArch(Triple::aarch64_be); break; case Triple::bpfel: T.setArch(Triple::bpfeb); break; case Triple::mips64el:T.setArch(Triple::mips64); break; case Triple::mipsel: T.setArch(Triple::mips); break; case Triple::ppc64le: T.setArch(Triple::ppc64); break; case Triple::sparcel: T.setArch(Triple::sparc); break; + case Triple::tcele: T.setArch(Triple::tce); break; default: llvm_unreachable("getBigEndianArchVariant: unknown triple."); } @@ -1539,13 +1552,13 @@ Triple Triple::getLittleEndianArchVariant() const { T.setArch(UnknownArch); break; - case Triple::tce: T.setArch(Triple::tcele); break; case Triple::aarch64_be: T.setArch(Triple::aarch64); break; case Triple::bpfeb: T.setArch(Triple::bpfel); break; case Triple::mips64: T.setArch(Triple::mips64el); break; case Triple::mips: T.setArch(Triple::mipsel); break; case Triple::ppc64: T.setArch(Triple::ppc64le); break; case Triple::sparc: T.setArch(Triple::sparcel); break; + case Triple::tce: T.setArch(Triple::tcele); break; default: llvm_unreachable("getLittleEndianArchVariant: unknown triple."); } @@ -1575,21 +1588,22 @@ bool Triple::isLittleEndian() const { case Triple::nvptx: case Triple::ppc64le: case Triple::r600: + case Triple::renderscript32: + case Triple::renderscript64: case Triple::riscv32: case Triple::riscv64: case Triple::shave: case Triple::sparcel: case Triple::spir64: case Triple::spir: + case Triple::tcele: case Triple::thumb: + case Triple::ve: case Triple::wasm32: case Triple::wasm64: case Triple::x86: case Triple::x86_64: case Triple::xcore: - case Triple::tcele: - case Triple::renderscript32: - case Triple::renderscript64: return true; default: return false; @@ -1644,10 +1658,10 @@ StringRef Triple::getARMCPUForArch(StringRef MArch) const { case llvm::Triple::Win32: // FIXME: this is invalid for WindowsCE return "cortex-a9"; - case llvm::Triple::MacOSX: case llvm::Triple::IOS: - case llvm::Triple::WatchOS: + case llvm::Triple::MacOSX: case llvm::Triple::TvOS: + case llvm::Triple::WatchOS: if (MArch == "v7k") return "cortex-a7"; break; @@ -1667,10 +1681,10 @@ StringRef Triple::getARMCPUForArch(StringRef MArch) const { switch (getOS()) { case llvm::Triple::NetBSD: switch (getEnvironment()) { - case llvm::Triple::GNUEABIHF: - case llvm::Triple::GNUEABI: - case llvm::Triple::EABIHF: case llvm::Triple::EABI: + case llvm::Triple::EABIHF: + case llvm::Triple::GNUEABI: + case llvm::Triple::GNUEABIHF: return "arm926ej-s"; default: return "strongarm"; diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 97162ae221871..3b8f8a19fe49c 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -349,38 +349,22 @@ bool AArch64ExpandPseudo::expandSetTagLoop( MachineBasicBlock::iterator &NextMBBI) { MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); - Register SizeReg = MI.getOperand(0).getReg(); - Register AddressReg = MI.getOperand(1).getReg(); + Register SizeReg = MI.getOperand(2).getReg(); + Register AddressReg = MI.getOperand(3).getReg(); MachineFunction *MF = MBB.getParent(); bool ZeroData = MI.getOpcode() == AArch64::STZGloop; - const unsigned OpCode1 = - ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex; - const unsigned OpCode2 = + const unsigned OpCode = ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex; - unsigned Size = MI.getOperand(2).getImm(); - assert(Size > 0 && Size % 16 == 0); - if (Size % (16 * 2) != 0) { - BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg) - .addReg(AddressReg) - .addReg(AddressReg) - .addImm(1); - Size -= 16; - } - MachineBasicBlock::iterator I = - BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg) - .addImm(Size); - expandMOVImm(MBB, I, 64); - auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); MF->insert(++MBB.getIterator(), LoopBB); MF->insert(++LoopBB->getIterator(), DoneBB); - BuildMI(LoopBB, DL, TII->get(OpCode2)) + BuildMI(LoopBB, DL, TII->get(OpCode)) .addDef(AddressReg) .addReg(AddressReg) .addReg(AddressReg) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 39d32863f15b2..c732106014e6c 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -170,11 +170,6 @@ static cl::opt cl::desc("reverse the CSR restore sequence"), cl::init(false), cl::Hidden); -static cl::opt StackTaggingMergeSetTag( - "stack-tagging-merge-settag", - cl::desc("merge settag instruction in function epilog"), cl::init(true), - cl::Hidden); - STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); /// This is the biggest offset to the stack pointer we can encode in aarch64 @@ -485,39 +480,6 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( return true; } -bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue( - MachineBasicBlock &MBB, unsigned StackBumpBytes) const { - if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes)) - return false; - - if (MBB.empty()) - return true; - - // Disable combined SP bump if the last instruction is an MTE tag store. It - // is almost always better to merge SP adjustment into those instructions. - MachineBasicBlock::iterator LastI = MBB.getFirstTerminator(); - MachineBasicBlock::iterator Begin = MBB.begin(); - while (LastI != Begin) { - --LastI; - if (LastI->isTransient()) - continue; - if (!LastI->getFlag(MachineInstr::FrameDestroy)) - break; - } - switch (LastI->getOpcode()) { - case AArch64::STGloop: - case AArch64::STZGloop: - case AArch64::STGOffset: - case AArch64::STZGOffset: - case AArch64::ST2GOffset: - case AArch64::STZ2GOffset: - return false; - default: - return true; - } - llvm_unreachable("unreachable"); -} - // Given a load or a store instruction, generate an appropriate unwinding SEH // code on Windows. static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI, @@ -1497,7 +1459,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // function. if (MF.hasEHFunclets()) AFI->setLocalStackSize(NumBytes - PrologueSaveSize); - bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes); + bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); // Assume we can't combine the last pop with the sp restore. if (!CombineSPBump && PrologueSaveSize != 0) { @@ -2675,399 +2637,9 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized( .addImm(0); } -namespace { -struct TagStoreInstr { - MachineInstr *MI; - int64_t Offset, Size; - explicit TagStoreInstr(MachineInstr *MI, int64_t Offset, int64_t Size) - : MI(MI), Offset(Offset), Size(Size) {} -}; - -class TagStoreEdit { - MachineFunction *MF; - MachineBasicBlock *MBB; - MachineRegisterInfo *MRI; - // Tag store instructions that are being replaced. - SmallVector TagStores; - // Combined memref arguments of the above instructions. - SmallVector CombinedMemRefs; - - // Replace allocation tags in [FrameReg + FrameRegOffset, FrameReg + - // FrameRegOffset + Size) with the address tag of SP. - Register FrameReg; - StackOffset FrameRegOffset; - int64_t Size; - // If not None, move FrameReg to (FrameReg + FrameRegUpdate) at the end. - Optional FrameRegUpdate; - // MIFlags for any FrameReg updating instructions. - unsigned FrameRegUpdateFlags; - - // Use zeroing instruction variants. - bool ZeroData; - DebugLoc DL; - - void emitUnrolled(MachineBasicBlock::iterator InsertI); - void emitLoop(MachineBasicBlock::iterator InsertI); - -public: - TagStoreEdit(MachineBasicBlock *MBB, bool ZeroData) - : MBB(MBB), ZeroData(ZeroData) { - MF = MBB->getParent(); - MRI = &MF->getRegInfo(); - } - // Add an instruction to be replaced. Instructions must be added in the - // ascending order of Offset, and have to be adjacent. - void addInstruction(TagStoreInstr I) { - assert((TagStores.empty() || - TagStores.back().Offset + TagStores.back().Size == I.Offset) && - "Non-adjacent tag store instructions."); - TagStores.push_back(I); - } - void clear() { TagStores.clear(); } - // Emit equivalent code at the given location, and erase the current set of - // instructions. May skip if the replacement is not profitable. May invalidate - // the input iterator and replace it with a valid one. - void emitCode(MachineBasicBlock::iterator &InsertI, - const AArch64FrameLowering *TFI, bool IsLast); -}; - -void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) { - const AArch64InstrInfo *TII = - MF->getSubtarget().getInstrInfo(); - - const int64_t kMinOffset = -256 * 16; - const int64_t kMaxOffset = 255 * 16; - - Register BaseReg = FrameReg; - int64_t BaseRegOffsetBytes = FrameRegOffset.getBytes(); - if (BaseRegOffsetBytes < kMinOffset || - BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset) { - Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass); - emitFrameOffset(*MBB, InsertI, DL, ScratchReg, BaseReg, - {BaseRegOffsetBytes, MVT::i8}, TII); - BaseReg = ScratchReg; - BaseRegOffsetBytes = 0; - } - - MachineInstr *LastI = nullptr; - while (Size) { - int64_t InstrSize = (Size > 16) ? 32 : 16; - unsigned Opcode = - InstrSize == 16 - ? (ZeroData ? AArch64::STZGOffset : AArch64::STGOffset) - : (ZeroData ? AArch64::STZ2GOffset : AArch64::ST2GOffset); - MachineInstr *I = BuildMI(*MBB, InsertI, DL, TII->get(Opcode)) - .addReg(AArch64::SP) - .addReg(BaseReg) - .addImm(BaseRegOffsetBytes / 16) - .setMemRefs(CombinedMemRefs); - // A store to [BaseReg, #0] should go last for an opportunity to fold the - // final SP adjustment in the epilogue. - if (BaseRegOffsetBytes == 0) - LastI = I; - BaseRegOffsetBytes += InstrSize; - Size -= InstrSize; - } - - if (LastI) - MBB->splice(InsertI, MBB, LastI); -} - -void TagStoreEdit::emitLoop(MachineBasicBlock::iterator InsertI) { - const AArch64InstrInfo *TII = - MF->getSubtarget().getInstrInfo(); - - Register BaseReg = FrameRegUpdate - ? FrameReg - : MRI->createVirtualRegister(&AArch64::GPR64RegClass); - Register SizeReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass); - - emitFrameOffset(*MBB, InsertI, DL, BaseReg, FrameReg, FrameRegOffset, TII); - - int64_t LoopSize = Size; - // If the loop size is not a multiple of 32, split off one 16-byte store at - // the end to fold BaseReg update into. - if (FrameRegUpdate && *FrameRegUpdate) - LoopSize -= LoopSize % 32; - MachineInstr *LoopI = - BuildMI(*MBB, InsertI, DL, - TII->get(ZeroData ? AArch64::STZGloop : AArch64::STGloop)) - .addDef(SizeReg) - .addDef(BaseReg) - .addImm(LoopSize) - .addReg(BaseReg) - .setMemRefs(CombinedMemRefs); - if (FrameRegUpdate) - LoopI->setFlags(FrameRegUpdateFlags); - - int64_t ExtraBaseRegUpdate = - FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getBytes() - Size) : 0; - if (LoopSize < Size) { - assert(FrameRegUpdate); - assert(Size - LoopSize == 16); - // Tag 16 more bytes at BaseReg and update BaseReg. - BuildMI(*MBB, InsertI, DL, - TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex)) - .addDef(BaseReg) - .addReg(BaseReg) - .addReg(BaseReg) - .addImm(1 + ExtraBaseRegUpdate / 16) - .setMemRefs(CombinedMemRefs) - .setMIFlags(FrameRegUpdateFlags); - } else if (ExtraBaseRegUpdate) { - // Update BaseReg. - BuildMI( - *MBB, InsertI, DL, - TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri)) - .addDef(BaseReg) - .addReg(BaseReg) - .addImm(std::abs(ExtraBaseRegUpdate)) - .addImm(0) - .setMIFlags(FrameRegUpdateFlags); - } -} - -// Check if *II is a register update that can be merged into STGloop that ends -// at (Reg + Size). RemainingOffset is the required adjustment to Reg after the -// end of the loop. -bool canMergeRegUpdate(MachineBasicBlock::iterator II, unsigned Reg, - int64_t Size, int64_t *TotalOffset) { - MachineInstr &MI = *II; - if ((MI.getOpcode() == AArch64::ADDXri || - MI.getOpcode() == AArch64::SUBXri) && - MI.getOperand(0).getReg() == Reg && MI.getOperand(1).getReg() == Reg) { - unsigned Shift = AArch64_AM::getShiftValue(MI.getOperand(3).getImm()); - int64_t Offset = MI.getOperand(2).getImm() << Shift; - if (MI.getOpcode() == AArch64::SUBXri) - Offset = -Offset; - int64_t AbsPostOffset = std::abs(Offset - Size); - const int64_t kMaxOffset = - 0xFFF; // Max encoding for unshifted ADDXri / SUBXri - if (AbsPostOffset <= kMaxOffset && AbsPostOffset % 16 == 0) { - *TotalOffset = Offset; - return true; - } - } - return false; -} - -void mergeMemRefs(const SmallVectorImpl &TSE, - SmallVectorImpl &MemRefs) { - MemRefs.clear(); - for (auto &TS : TSE) { - MachineInstr *MI = TS.MI; - // An instruction without memory operands may access anything. Be - // conservative and return an empty list. - if (MI->memoperands_empty()) { - MemRefs.clear(); - return; - } - MemRefs.append(MI->memoperands_begin(), MI->memoperands_end()); - } -} - -void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI, - const AArch64FrameLowering *TFI, bool IsLast) { - if (TagStores.empty()) - return; - TagStoreInstr &FirstTagStore = TagStores[0]; - TagStoreInstr &LastTagStore = TagStores[TagStores.size() - 1]; - Size = LastTagStore.Offset - FirstTagStore.Offset + LastTagStore.Size; - DL = TagStores[0].MI->getDebugLoc(); - - unsigned Reg; - FrameRegOffset = TFI->resolveFrameOffsetReference( - *MF, FirstTagStore.Offset, false /*isFixed*/, false /*isSVE*/, Reg, - /*PreferFP=*/false, /*ForSimm=*/true); - FrameReg = Reg; - FrameRegUpdate = None; - - mergeMemRefs(TagStores, CombinedMemRefs); - - LLVM_DEBUG(dbgs() << "Replacing adjacent STG instructions:\n"; - for (const auto &Instr - : TagStores) { dbgs() << " " << *Instr.MI; }); - - // Size threshold where a loop becomes shorter than a linear sequence of - // tagging instructions. - const int kSetTagLoopThreshold = 176; - if (Size < kSetTagLoopThreshold) { - if (TagStores.size() < 2) - return; - emitUnrolled(InsertI); - } else { - MachineInstr *UpdateInstr = nullptr; - int64_t TotalOffset; - if (IsLast) { - // See if we can merge base register update into the STGloop. - // This is done in AArch64LoadStoreOptimizer for "normal" stores, - // but STGloop is way too unusual for that, and also it only - // realistically happens in function epilogue. Also, STGloop is expanded - // before that pass. - if (InsertI != MBB->end() && - canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getBytes() + Size, - &TotalOffset)) { - UpdateInstr = &*InsertI++; - LLVM_DEBUG(dbgs() << "Folding SP update into loop:\n " - << *UpdateInstr); - } - } - - if (!UpdateInstr && TagStores.size() < 2) - return; - - if (UpdateInstr) { - FrameRegUpdate = TotalOffset; - FrameRegUpdateFlags = UpdateInstr->getFlags(); - } - emitLoop(InsertI); - if (UpdateInstr) - UpdateInstr->eraseFromParent(); - } - - for (auto &TS : TagStores) - TS.MI->eraseFromParent(); -} - -bool isMergeableStackTaggingInstruction(MachineInstr &MI, int64_t &Offset, - int64_t &Size, bool &ZeroData) { - MachineFunction &MF = *MI.getParent()->getParent(); - const MachineFrameInfo &MFI = MF.getFrameInfo(); - - unsigned Opcode = MI.getOpcode(); - ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGOffset || - Opcode == AArch64::STZ2GOffset); - - if (Opcode == AArch64::STGloop || Opcode == AArch64::STZGloop) { - if (!MI.getOperand(0).isDead() || !MI.getOperand(1).isDead()) - return false; - if (!MI.getOperand(2).isImm() || !MI.getOperand(3).isFI()) - return false; - Offset = MFI.getObjectOffset(MI.getOperand(3).getIndex()); - Size = MI.getOperand(2).getImm(); - return true; - } - - if (Opcode == AArch64::STGOffset || Opcode == AArch64::STZGOffset) - Size = 16; - else if (Opcode == AArch64::ST2GOffset || Opcode == AArch64::STZ2GOffset) - Size = 32; - else - return false; - - if (MI.getOperand(0).getReg() != AArch64::SP || !MI.getOperand(1).isFI()) - return false; - - Offset = MFI.getObjectOffset(MI.getOperand(1).getIndex()) + - 16 * MI.getOperand(2).getImm(); - return true; -} - -// Detect a run of memory tagging instructions for adjacent stack frame slots, -// and replace them with a shorter instruction sequence: -// * replace STG + STG with ST2G -// * replace STGloop + STGloop with STGloop -// This code needs to run when stack slot offsets are already known, but before -// FrameIndex operands in STG instructions are eliminated. -MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II, - const AArch64FrameLowering *TFI, - RegScavenger *RS) { - bool FirstZeroData; - int64_t Size, Offset; - MachineInstr &MI = *II; - MachineBasicBlock *MBB = MI.getParent(); - MachineBasicBlock::iterator NextI = ++II; - if (&MI == &MBB->instr_back()) - return II; - if (!isMergeableStackTaggingInstruction(MI, Offset, Size, FirstZeroData)) - return II; - - SmallVector Instrs; - Instrs.emplace_back(&MI, Offset, Size); - - constexpr int kScanLimit = 10; - int Count = 0; - for (MachineBasicBlock::iterator E = MBB->end(); - NextI != E && Count < kScanLimit; ++NextI) { - MachineInstr &MI = *NextI; - bool ZeroData; - int64_t Size, Offset; - // Collect instructions that update memory tags with a FrameIndex operand - // and (when applicable) constant size, and whose output registers are dead - // (the latter is almost always the case in practice). Since these - // instructions effectively have no inputs or outputs, we are free to skip - // any non-aliasing instructions in between without tracking used registers. - if (isMergeableStackTaggingInstruction(MI, Offset, Size, ZeroData)) { - if (ZeroData != FirstZeroData) - break; - Instrs.emplace_back(&MI, Offset, Size); - continue; - } - - // Only count non-transient, non-tagging instructions toward the scan - // limit. - if (!MI.isTransient()) - ++Count; - - // Just in case, stop before the epilogue code starts. - if (MI.getFlag(MachineInstr::FrameSetup) || - MI.getFlag(MachineInstr::FrameDestroy)) - break; - - // Reject anything that may alias the collected instructions. - if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects()) - break; - } - - // New code will be inserted after the last tagging instruction we've found. - MachineBasicBlock::iterator InsertI = Instrs.back().MI; - InsertI++; - - llvm::stable_sort(Instrs, - [](const TagStoreInstr &Left, const TagStoreInstr &Right) { - return Left.Offset < Right.Offset; - }); - - // Make sure that we don't have any overlapping stores. - int64_t CurOffset = Instrs[0].Offset; - for (auto &Instr : Instrs) { - if (CurOffset > Instr.Offset) - return NextI; - CurOffset = Instr.Offset + Instr.Size; - } - - // Find contiguous runs of tagged memory and emit shorter instruction - // sequencies for them when possible. - TagStoreEdit TSE(MBB, FirstZeroData); - Optional EndOffset; - for (auto &Instr : Instrs) { - if (EndOffset && *EndOffset != Instr.Offset) { - // Found a gap. - TSE.emitCode(InsertI, TFI, /*IsLast = */ false); - TSE.clear(); - } - - TSE.addInstruction(Instr); - EndOffset = Instr.Offset + Instr.Size; - } - - TSE.emitCode(InsertI, TFI, /*IsLast = */ true); - - return InsertI; -} -} // namespace - -void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced( - MachineFunction &MF, RegScavenger *RS = nullptr) const { - if (StackTaggingMergeSetTag) - for (auto &BB : MF) - for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();) - II = tryMergeAdjacentSTG(II, this, RS); -} - -/// For Win64 AArch64 EH, the offset to the Unwind object is from the SP -/// before the update. This is easily retrieved as it is exactly the offset -/// that is set in processFunctionBeforeFrameFinalized. +/// For Win64 AArch64 EH, the offset to the Unwind object is from the SP before +/// the update. This is easily retrieved as it is exactly the offset that is set +/// in processFunctionBeforeFrameFinalized. int AArch64FrameLowering::getFrameIndexReferencePreferSP( const MachineFunction &MF, int FI, unsigned &FrameReg, bool IgnoreSPUpdates) const { diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index 57a7924fb8f8f..b5719feb6b154 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -77,10 +77,6 @@ class AArch64FrameLowering : public TargetFrameLowering { void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override; - void - processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, - RegScavenger *RS) const override; - unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override; unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const; @@ -111,8 +107,6 @@ class AArch64FrameLowering : public TargetFrameLowering { int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF, int &MinCSFrameIndex, int &MaxCSFrameIndex) const; - bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB, - unsigned StackBumpBytes) const; }; } // End llvm namespace diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 0ed2a678c4f01..54f3f7c101324 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3458,8 +3458,6 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, case AArch64::ST1Fourv1d: case AArch64::IRG: case AArch64::IRGstack: - case AArch64::STGloop: - case AArch64::STZGloop: return AArch64FrameOffsetCannotUpdate; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 04a23f31ffd60..f4d340c9f06a0 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1514,17 +1514,17 @@ def TAGPstack // register / expression for the tagged base pointer of the current function. def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>; -// Large STG to be expanded into a loop. $sz is the size, $Rn is start address. -// $Rn_wback is one past the end of the range. $Rm is the loop counter. +// Large STG to be expanded into a loop. $Rm is the size, $Rn is start address. +// $Rn_wback is one past the end of the range. let isCodeGenOnly=1, mayStore=1 in { def STGloop - : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn), - [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >, + : Pseudo<(outs GPR64common:$Rm_wback, GPR64sp:$Rn_wback), (ins GPR64common:$Rm, GPR64sp:$Rn), + [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,$Rm = $Rm_wback,@earlyclobber $Rm_wback" >, Sched<[WriteAdr, WriteST]>; def STZGloop - : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn), - [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >, + : Pseudo<(outs GPR64common:$Rm_wback, GPR64sp:$Rn_wback), (ins GPR64common:$Rm, GPR64sp:$Rn), + [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,$Rm = $Rm_wback,@earlyclobber $Rm_wback" >, Sched<[WriteAdr, WriteST]>; } diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 4a3778a2fd072..14f839cd4f812 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -390,10 +390,6 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI, if (isFrameOffsetLegal(MI, AArch64::SP, Offset)) return false; - // If even offset 0 is illegal, we don't want a virtual base register. - if (!isFrameOffsetLegal(MI, AArch64::SP, 0)) - return false; - // The offset likely isn't legal; we want to allocate a virtual base register. return true; } @@ -449,17 +445,6 @@ void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, (void)Done; } -static Register getScratchRegisterForInstruction(MachineInstr &MI) { - // ST*Gloop can only have #fi in op3, and they have a constraint that - // op1==op3. Since op1 is early-clobber, it may (and also must) be used as the - // scratch register. - if (MI.getOpcode() == AArch64::STGloop || MI.getOpcode() == AArch64::STZGloop) - return MI.getOperand(1).getReg(); - else - return MI.getMF()->getRegInfo().createVirtualRegister( - &AArch64::GPR64RegClass); -} - void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { @@ -516,7 +501,8 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // in a scratch register. Offset = TFI->resolveFrameIndexReference( MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true); - Register ScratchReg = getScratchRegisterForInstruction(MI); + Register ScratchReg = + MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII); BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(AArch64::LDG), ScratchReg) @@ -545,7 +531,8 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // If we get here, the immediate doesn't fit into the instruction. We folded // as much as possible above. Handle the rest, providing a register that is // SP+LargeImm. - Register ScratchReg = getScratchRegisterForInstruction(MI); + Register ScratchReg = + MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII); MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true); } diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp index e050a0028eca6..ba61ed726e840 100644 --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -125,13 +125,19 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForSetTag( return EmitUnrolledSetTag(DAG, dl, Chain, Addr, ObjSize, BaseMemOperand, ZeroData); - const EVT ResTys[] = {MVT::i64, MVT::i64, MVT::Other}; - - if (Addr.getOpcode() == ISD::FrameIndex) { - int FI = cast(Addr)->getIndex(); - Addr = DAG.getTargetFrameIndex(FI, MVT::i64); + if (ObjSize % 32 != 0) { + SDNode *St1 = DAG.getMachineNode( + ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex, dl, + {MVT::i64, MVT::Other}, + {Addr, Addr, DAG.getTargetConstant(1, dl, MVT::i64), Chain}); + DAG.setNodeMemRefs(cast(St1), {BaseMemOperand}); + ObjSize -= 16; + Addr = SDValue(St1, 0); + Chain = SDValue(St1, 1); } - SDValue Ops[] = {DAG.getTargetConstant(ObjSize, dl, MVT::i64), Addr, Chain}; + + const EVT ResTys[] = {MVT::i64, MVT::i64, MVT::Other}; + SDValue Ops[] = {DAG.getConstant(ObjSize, dl, MVT::i64), Addr, Chain}; SDNode *St = DAG.getMachineNode( ZeroData ? AArch64::STZGloop : AArch64::STGloop, dl, ResTys, Ops); diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index bd48e5d846af3..70c9db13f139d 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -1223,7 +1223,7 @@ class AArch64Operand : public MCParsedAsmOperand { APFloat RealVal(APFloat::IEEEdouble()); auto StatusOrErr = RealVal.convertFromString(Desc->Repr, APFloat::rmTowardZero); - if (!StatusOrErr || *StatusOrErr != APFloat::opOK) + if (errorToBool(StatusOrErr.takeError()) || *StatusOrErr != APFloat::opOK) llvm_unreachable("FP immediate is not exact"); if (getFPImm().bitwiseIsEqual(RealVal)) @@ -2580,7 +2580,7 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) { APFloat RealVal(APFloat::IEEEdouble()); auto StatusOrErr = RealVal.convertFromString(Tok.getString(), APFloat::rmTowardZero); - if (!StatusOrErr) { + if (errorToBool(StatusOrErr.takeError())) { TokError("invalid floating point representation"); return MatchOperand_ParseFail; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index d7c211f1ed930..f9983693a99ec 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -30,6 +30,10 @@ def gi_vop3mods : GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_vop3mods_nnan : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + def gi_vop3omods : GIComplexOperandMatcher, GIComplexPatternEquiv; @@ -204,3 +208,12 @@ foreach Ty = [i64, p0, p1, p4] in { def gi_as_i32timm : GICustomOperandRenderer<"renderTruncImm32">, GISDNodeXFormEquiv; + +def gi_NegateImm : GICustomOperandRenderer<"renderNegateImm">, + GISDNodeXFormEquiv; + +def gi_bitcast_fpimm_to_i32 : GICustomOperandRenderer<"renderBitcastImm">, + GISDNodeXFormEquiv; + +def gi_IMMPopCount : GICustomOperandRenderer<"renderPopcntImm">, + GISDNodeXFormEquiv; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index a41c8f1a6a3f3..132c51c9e08f1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1684,6 +1684,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) { case TargetOpcode::G_SEXT: case TargetOpcode::G_ZEXT: case TargetOpcode::G_ANYEXT: + if (selectImpl(I, *CoverageInfo)) + return true; return selectG_SZA_EXT(I); case TargetOpcode::G_BRCOND: return selectG_BRCOND(I); @@ -1770,6 +1772,20 @@ AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const { }}; } +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectVOP3Mods_nnan(MachineOperand &Root) const { + Register Src; + unsigned Mods; + std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg()); + if (!TM.Options.NoNaNsFPMath && !isKnownNeverNaN(Src, *MRI)) + return None; + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods + }}; +} + InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectVOP3OpSelMods0(MachineOperand &Root) const { // FIXME: Handle clamp and op_sel @@ -2097,6 +2113,29 @@ void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB, MIB.addImm(CstVal.getValue()); } +void AMDGPUInstructionSelector::renderNegateImm(MachineInstrBuilder &MIB, + const MachineInstr &MI) const { + assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT"); + MIB.addImm(-MI.getOperand(1).getCImm()->getSExtValue()); +} + +void AMDGPUInstructionSelector::renderBitcastImm(MachineInstrBuilder &MIB, + const MachineInstr &MI) const { + const MachineOperand &Op = MI.getOperand(1); + if (MI.getOpcode() == TargetOpcode::G_FCONSTANT) + MIB.addImm(Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue()); + else { + assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT"); + MIB.addImm(Op.getCImm()->getSExtValue()); + } +} + +void AMDGPUInstructionSelector::renderPopcntImm(MachineInstrBuilder &MIB, + const MachineInstr &MI) const { + assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT"); + MIB.addImm(MI.getOperand(1).getCImm()->getValue().countPopulation()); +} + bool AMDGPUInstructionSelector::isInlineImmediate16(int64_t Imm) const { return AMDGPU::isInlinableLiteral16(Imm, STI.hasInv2PiInlineImm()); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 45782ab3185ef..0799ace086756 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -132,6 +132,8 @@ class AMDGPUInstructionSelector : public InstructionSelector { selectVOP3OMods(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns selectVOP3Mods(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectVOP3Mods_nnan(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns selectVOP3OpSelMods0(MachineOperand &Root) const; @@ -169,6 +171,15 @@ class AMDGPUInstructionSelector : public InstructionSelector { void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI) const; + void renderNegateImm(MachineInstrBuilder &MIB, + const MachineInstr &MI) const; + + void renderBitcastImm(MachineInstrBuilder &MIB, + const MachineInstr &MI) const; + + void renderPopcntImm(MachineInstrBuilder &MIB, + const MachineInstr &MI) const; + bool isInlineImmediate16(int64_t Imm) const; bool isInlineImmediate32(int64_t Imm) const; bool isInlineImmediate64(int64_t Imm) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 89e52d63af2d2..7e71dbdd12408 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -336,15 +336,10 @@ class Aligned { int MinAlignment = Bytes; } -class LoadFrag : PatFrag<(ops node:$ptr), (op node:$ptr)>; - -class StoreFrag : PatFrag < - (ops node:$value, node:$ptr), (op node:$value, node:$ptr) ->; - class StoreHi16 : PatFrag < - (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr) ->; + (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)> { + let IsStore = 1; +} def LoadAddress_constant : AddressSpaceList<[ AddrSpaces.Constant ]>; def LoadAddress_global : AddressSpaceList<[ AddrSpaces.Global, AddrSpaces.Constant ]>; @@ -366,48 +361,6 @@ def StoreAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>; -class GlobalLoadAddress : CodePatPred<[{ - auto AS = cast(N)->getAddressSpace(); - return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS; -}]>; - -class FlatLoadAddress : CodePatPred<[{ - const auto AS = cast(N)->getAddressSpace(); - return AS == AMDGPUAS::FLAT_ADDRESS || - AS == AMDGPUAS::GLOBAL_ADDRESS || - AS == AMDGPUAS::CONSTANT_ADDRESS; -}]>; - -class GlobalAddress : CodePatPred<[{ - return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; -}]>; - -class PrivateAddress : CodePatPred<[{ - return cast(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; -}]>; - -class LocalAddress : CodePatPred<[{ - return cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; -}]>; - -class RegionAddress : CodePatPred<[{ - return cast(N)->getAddressSpace() == AMDGPUAS::REGION_ADDRESS; -}]>; - -class FlatStoreAddress : CodePatPred<[{ - const auto AS = cast(N)->getAddressSpace(); - return AS == AMDGPUAS::FLAT_ADDRESS || - AS == AMDGPUAS::GLOBAL_ADDRESS; -}]>; - -// TODO: Remove these when stores to new PatFrag format. -class PrivateStore : StoreFrag , PrivateAddress; -class LocalStore : StoreFrag , LocalAddress; -class RegionStore : StoreFrag , RegionAddress; -class GlobalStore : StoreFrag, GlobalAddress; -class FlatStore : StoreFrag , FlatStoreAddress; - - foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { let AddressSpaces = !cast("LoadAddress_"#as).AddrSpaces in { @@ -485,6 +438,10 @@ def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr), let MemoryVT = i16; } +def store_hi16_#as : StoreHi16 ; +def truncstorei8_hi16_#as : StoreHi16; +def truncstorei16_hi16_#as : StoreHi16; + defm atomic_store_#as : binary_atomic_op; } // End let AddressSpaces = ... @@ -521,18 +478,6 @@ defm atomic_load_fadd : ret_noret_binary_atomic_op; defm AMDGPUatomic_cmp_swap : ret_noret_binary_atomic_op; -def store_hi16_private : StoreHi16 , PrivateAddress; -def truncstorei8_hi16_private : StoreHi16, PrivateAddress; - -def store_atomic_global : GlobalStore; -def truncstorei8_hi16_global : StoreHi16 , GlobalAddress; -def truncstorei16_hi16_global : StoreHi16 , GlobalAddress; - -def store_local_hi16 : StoreHi16 , LocalAddress; -def truncstorei8_local_hi16 : StoreHi16, LocalAddress; -def atomic_store_local : LocalStore ; - - def load_align8_local : PatFrag <(ops node:$ptr), (load_local node:$ptr)> { let IsLoad = 1; let IsNonExtLoad = 1; @@ -557,30 +502,6 @@ def store_align16_local: PatFrag<(ops node:$val, node:$ptr), let IsTruncStore = 0; } - -def atomic_store_flat : FlatStore ; -def truncstorei8_hi16_flat : StoreHi16, FlatStoreAddress; -def truncstorei16_hi16_flat : StoreHi16, FlatStoreAddress; - - -class local_binary_atomic_op : - PatFrag<(ops node:$ptr, node:$value), - (atomic_op node:$ptr, node:$value), [{ - return cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; -}]>; - -class region_binary_atomic_op : - PatFrag<(ops node:$ptr, node:$value), - (atomic_op node:$ptr, node:$value), [{ - return cast(N)->getAddressSpace() == AMDGPUAS::REGION_ADDRESS; -}]>; - - -def mskor_global : PatFrag<(ops node:$val, node:$ptr), - (AMDGPUstore_mskor node:$val, node:$ptr), [{ - return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; -}]>; - let AddressSpaces = StoreAddress_local.AddrSpaces in { defm atomic_cmp_swap_local : ternary_atomic_op; defm atomic_cmp_swap_local_m0 : ternary_atomic_op; @@ -591,17 +512,6 @@ defm atomic_cmp_swap_region : ternary_atomic_op; defm atomic_cmp_swap_region_m0 : ternary_atomic_op; } -// Legacy. -def atomic_cmp_swap_global_noret : PatFrag< - (ops node:$ptr, node:$cmp, node:$value), - (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; - -def atomic_cmp_swap_global_ret : PatFrag< - (ops node:$ptr, node:$cmp, node:$value), - (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; - //===----------------------------------------------------------------------===// // Misc Pattern Fragments //===----------------------------------------------------------------------===// @@ -827,30 +737,6 @@ class ROTRPattern : AMDGPUPat < (BIT_ALIGN $src0, $src0, $src1) >; -multiclass IntMed3Pat { - - // This matches 16 permutations of - // min(max(a, b), max(min(a, b), c)) - def : AMDGPUPat < - (min (max_oneuse vt:$src0, vt:$src1), - (max_oneuse (min_oneuse vt:$src0, vt:$src1), vt:$src2)), - (med3Inst vt:$src0, vt:$src1, vt:$src2) ->; - - // This matches 16 permutations of - // max(min(x, y), min(max(x, y), z)) - def : AMDGPUPat < - (max (min_oneuse vt:$src0, vt:$src1), - (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)), - (med3Inst $src0, $src1, $src2) ->; -} - // Special conversion patterns def cvt_rpi_i32_f32 : PatFrag < diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 326df6bc8fb2b..d5834826fcd8b 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -2363,7 +2363,7 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { APFloat RealVal(APFloat::IEEEdouble()); auto roundMode = APFloat::rmNearestTiesToEven; - if (!RealVal.convertFromString(Num, roundMode)) { + if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { return MatchOperand_ParseFail; } if (Negate) diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 1b12550aed88f..691aff4ecbb8a 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1621,8 +1621,8 @@ multiclass MUBUFStore_Atomic_Pattern ; } let SubtargetPredicate = isGFX6GFX7 in { -defm : MUBUFStore_Atomic_Pattern ; -defm : MUBUFStore_Atomic_Pattern ; +defm : MUBUFStore_Atomic_Pattern ; +defm : MUBUFStore_Atomic_Pattern ; } // End Predicates = isGFX6GFX7 diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index f008b800bd327..f4e50e3a15e9a 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -619,7 +619,7 @@ def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">; def : GCNPat < (int_amdgcn_ds_swizzle i32:$src, timm:$offset16), - (DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0)) + (DS_SWIZZLE_B32 VGPR_32:$src, (as_i16imm $offset16), (i1 0)) >; class DSReadPat : GCNPat < @@ -733,8 +733,8 @@ defm : DSAtomicWritePat_mc ; defm : DSAtomicWritePat_mc ; let OtherPredicates = [D16PreservesUnusedBits] in { -def : DSWritePat ; -def : DSWritePat ; +def : DSWritePat ; +def : DSWritePat ; } diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index e106af42deddb..2057cac346d45 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -896,8 +896,8 @@ def : FlatSignedLoadPat_D16 ; def : FlatSignedLoadPat_D16 ; } -def : FlatStoreSignedAtomicPat ; -def : FlatStoreSignedAtomicPat ; +def : FlatStoreSignedAtomicPat ; +def : FlatStoreSignedAtomicPat ; def : FlatSignedAtomicPat ; def : FlatSignedAtomicPat ; diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td index f40eece859ee7..cbdf0de44f873 100644 --- a/llvm/lib/Target/AMDGPU/R600Instructions.td +++ b/llvm/lib/Target/AMDGPU/R600Instructions.td @@ -295,9 +295,23 @@ class VTX_READ pattern> let VTXInst = 1; } -// FIXME: Deprecated. -class LocalLoad : LoadFrag , LocalAddress; +// Legacy. +def atomic_cmp_swap_global_noret : PatFrag< + (ops node:$ptr, node:$cmp, node:$value), + (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), + [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; + +def atomic_cmp_swap_global_ret : PatFrag< + (ops node:$ptr, node:$cmp, node:$value), + (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), + [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; + +def mskor_global : PatFrag<(ops node:$val, node:$ptr), + (AMDGPUstore_mskor node:$val, node:$ptr), [{ + return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; +}]>; +// FIXME: These are deprecated class AZExtLoadBase : PatFrag<(ops node:$ptr), (ld_node node:$ptr), [{ LoadSDNode *L = cast(N); @@ -319,9 +333,10 @@ def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i32; }]>; -// FIXME: These are deprecated -def az_extloadi8_local : LocalLoad ; -def az_extloadi16_local : LocalLoad ; +let AddressSpaces = LoadAddress_local.AddrSpaces in { +def az_extloadi8_local : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr)>; +def az_extloadi16_local : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr)>; +} class LoadParamFrag : PatFrag < (ops node:$ptr), (load_type node:$ptr), diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 609a345ea18ca..1518beafc7aba 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -579,46 +579,37 @@ def si_setcc_uniform : PatFrag < // SDNodes PatFrags for d16 loads //===----------------------------------------------------------------------===// -class LoadD16Frag : PatFrag<(ops node:$ptr, node:$tied_in), (op node:$ptr, node:$tied_in)>; -class LocalLoadD16 : LoadD16Frag , LocalAddress; -class GlobalLoadD16 : LoadD16Frag , GlobalLoadAddress; -class PrivateLoadD16 : LoadD16Frag , PrivateAddress; -class FlatLoadD16 : LoadD16Frag , FlatLoadAddress; - -def load_d16_hi_local : LocalLoadD16 ; -def az_extloadi8_d16_hi_local : LocalLoadD16 ; -def sextloadi8_d16_hi_local : LocalLoadD16 ; - -def load_d16_hi_global : GlobalLoadD16 ; -def az_extloadi8_d16_hi_global : GlobalLoadD16 ; -def sextloadi8_d16_hi_global : GlobalLoadD16 ; - -def load_d16_hi_private : PrivateLoadD16 ; -def az_extloadi8_d16_hi_private : PrivateLoadD16 ; -def sextloadi8_d16_hi_private : PrivateLoadD16 ; +class LoadD16Frag : PatFrag< + (ops node:$ptr, node:$tied_in), + (op node:$ptr, node:$tied_in)> { + let IsLoad = 1; +} -def load_d16_hi_flat : FlatLoadD16 ; -def az_extloadi8_d16_hi_flat : FlatLoadD16 ; -def sextloadi8_d16_hi_flat : FlatLoadD16 ; +foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { +let AddressSpaces = !cast("LoadAddress_"#as).AddrSpaces in { +def load_d16_hi_#as : LoadD16Frag ; -def load_d16_lo_local : LocalLoadD16 ; -def az_extloadi8_d16_lo_local : LocalLoadD16 ; -def sextloadi8_d16_lo_local : LocalLoadD16 ; +def az_extloadi8_d16_hi_#as : LoadD16Frag { + let MemoryVT = i8; +} -def load_d16_lo_global : GlobalLoadD16 ; -def az_extloadi8_d16_lo_global : GlobalLoadD16 ; -def sextloadi8_d16_lo_global : GlobalLoadD16 ; +def sextloadi8_d16_hi_#as : LoadD16Frag { + let MemoryVT = i8; +} -def load_d16_lo_private : PrivateLoadD16 ; -def az_extloadi8_d16_lo_private : PrivateLoadD16 ; -def sextloadi8_d16_lo_private : PrivateLoadD16 ; +def load_d16_lo_#as : LoadD16Frag ; -def load_d16_lo_flat : FlatLoadD16 ; -def az_extloadi8_d16_lo_flat : FlatLoadD16 ; -def sextloadi8_d16_lo_flat : FlatLoadD16 ; +def az_extloadi8_d16_lo_#as : LoadD16Frag { + let MemoryVT = i8; +} +def sextloadi8_d16_lo_#as : LoadD16Frag { + let MemoryVT = i8; +} +} // End let AddressSpaces = ... +} // End foreach AddrSpace def lshr_rev : PatFrag < (ops node:$src1, node:$src0), diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index dcc139a9fe943..d84720f820ee3 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1930,9 +1930,22 @@ def : GCNPat < // TODO: Also do for 64-bit. def : GCNPat< (add i32:$src0, (i32 NegSubInlineConst32:$src1)), - (S_SUB_I32 $src0, NegSubInlineConst32:$src1) + (S_SUB_I32 SReg_32:$src0, NegSubInlineConst32:$src1) >; +def : GCNPat< + (add i32:$src0, (i32 NegSubInlineConst32:$src1)), + (V_SUB_U32_e64 VS_32:$src0, NegSubInlineConst32:$src1)> { + let SubtargetPredicate = HasAddNoCarryInsts; +} + +def : GCNPat< + (add i32:$src0, (i32 NegSubInlineConst32:$src1)), + (V_SUB_I32_e64 VS_32:$src0, NegSubInlineConst32:$src1)> { + let SubtargetPredicate = NotHasAddNoCarryInsts; +} + + // Avoid pointlessly materializing a constant in VGPR. // FIXME: Should also do this for readlane, but tablegen crashes on // the ignored src1. @@ -1959,6 +1972,29 @@ defm : BFMPatterns ; defm : BFEPattern ; defm : SHA256MaPattern ; +multiclass IntMed3Pat { + + // This matches 16 permutations of + // min(max(a, b), max(min(a, b), c)) + def : AMDGPUPat < + (min (max_oneuse i32:$src0, i32:$src1), + (max_oneuse (min_oneuse i32:$src0, i32:$src1), i32:$src2)), + (med3Inst VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2) +>; + + // This matches 16 permutations of + // max(min(x, y), min(max(x, y), z)) + def : AMDGPUPat < + (max (min_oneuse i32:$src0, i32:$src1), + (min_oneuse (max_oneuse i32:$src0, i32:$src1), i32:$src2)), + (med3Inst VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2) +>; +} + defm : IntMed3Pat; defm : IntMed3Pat; @@ -1989,22 +2025,21 @@ multiclass Int16Med3Pat { + SDPatternOperator min_oneuse> { // This matches 16 permutations of // max(min(x, y), min(max(x, y), z)) def : GCNPat < - (max (min_oneuse vt:$src0, vt:$src1), - (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)), - (med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE) + (max (min_oneuse i16:$src0, i16:$src1), + (min_oneuse (max_oneuse i16:$src0, i16:$src1), i16:$src2)), + (med3Inst SRCMODS.NONE, VSrc_b16:$src0, SRCMODS.NONE, VSrc_b16:$src1, SRCMODS.NONE, VSrc_b16:$src2, DSTCLAMP.NONE) >; // This matches 16 permutations of // min(max(a, b), max(min(a, b), c)) def : GCNPat < - (min (max_oneuse vt:$src0, vt:$src1), - (max_oneuse (min_oneuse vt:$src0, vt:$src1), vt:$src2)), - (med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE) + (min (max_oneuse i16:$src0, i16:$src1), + (max_oneuse (min_oneuse i16:$src0, i16:$src1), i16:$src2)), + (med3Inst SRCMODS.NONE, VSrc_b16:$src0, SRCMODS.NONE, VSrc_b16:$src1, SRCMODS.NONE, VSrc_b16:$src2, DSTCLAMP.NONE) >; } diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 064b26665542f..aaadc3dbc7215 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -729,7 +729,7 @@ multiclass Arithmetic_i16_0Hi_Pats { def : GCNPat< (i32 (zext (op i16:$src0, i16:$src1))), - (inst $src0, $src1) + (inst VSrc_b16:$src0, VSrc_b16:$src1) >; def : GCNPat< @@ -771,7 +771,7 @@ let Predicates = [Has16BitInsts] in { // TODO: Also do for 64-bit. def : GCNPat< (add i16:$src0, (i16 NegSubInlineConst16:$src1)), - (V_SUB_U16_e64 $src0, NegSubInlineConst16:$src1) + (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineConst16:$src1) >; @@ -779,7 +779,7 @@ let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in { def : GCNPat< (i32 (zext (add i16:$src0, (i16 NegSubInlineConst16:$src1)))), - (V_SUB_U16_e64 $src0, NegSubInlineConst16:$src1) + (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineConst16:$src1) >; defm : Arithmetic_i16_0Hi_Pats; diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index 31a98d86a54d2..6c45eecf0c23d 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -19,6 +19,22 @@ /// which determines whether we can generated the tail-predicated low-overhead /// loop form. /// +/// Assumptions and Dependencies: +/// Low-overhead loops are constructed and executed using a setup instruction: +/// DLS, WLS, DLSTP or WLSTP and an instruction that loops back: LE or LETP. +/// WLS(TP) and LE(TP) are branching instructions with a (large) limited range +/// but fixed polarity: WLS can only branch forwards and LE can only branch +/// backwards. These restrictions mean that this pass is dependent upon block +/// layout and block sizes, which is why it's the last pass to run. The same is +/// true for ConstantIslands, but this pass does not increase the size of the +/// basic blocks, nor does it change the CFG. Instructions are mainly removed +/// during the transform and pseudo instructions are replaced by real ones. In +/// some cases, when we have to revert to a 'normal' loop, we have to introduce +/// multiple instructions for a single pseudo (see RevertWhile and +/// RevertLoopEnd). To handle this situation, t2WhileLoopStart and t2LoopEnd +/// are defined to be as large as this maximum sequence of replacement +/// instructions. +/// //===----------------------------------------------------------------------===// #include "ARM.h" diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 41ad8b0c04de4..7ff05034c1f25 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1235,6 +1235,11 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, unsigned Cost = 0; for (auto *BB : L->getBlocks()) { for (auto &I : *BB) { + // Don't unroll vectorised loop. MVE does not benefit from it as much as + // scalar code. + if (I.getType()->isVectorTy()) + return; + if (isa(I) || isa(I)) { ImmutableCallSite CS(&I); if (const Function *F = CS.getCalledFunction()) { @@ -1243,10 +1248,6 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, } return; } - // Don't unroll vectorised loop. MVE does not benefit from it as much as - // scalar code. - if (I.getType()->isVectorTy()) - return; SmallVector Operands(I.value_op_begin(), I.value_op_end()); diff --git a/llvm/lib/Target/LLVMBuild.txt b/llvm/lib/Target/LLVMBuild.txt index d6a95a3c67133..7403f7713a9f6 100644 --- a/llvm/lib/Target/LLVMBuild.txt +++ b/llvm/lib/Target/LLVMBuild.txt @@ -36,6 +36,7 @@ subdirectories = WebAssembly X86 XCore + VE ; This is a special group whose required libraries are extended (by llvm-build) ; with the best execution engine (the native JIT, if available, or the diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp index 1b67e1e55bf78..74192cb20cd05 100644 --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -897,6 +897,8 @@ bool PPCMIPeephole::simplifyCode(void) { bool Is64Bit = (MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec); + Simplified = true; + LLVM_DEBUG(dbgs() << "Replace Instr: "); LLVM_DEBUG(MI.dump()); @@ -913,9 +915,14 @@ bool PPCMIPeephole::simplifyCode(void) { MI.RemoveOperand(3); MI.getOperand(2).setImm(0); MI.setDesc(TII->get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec)); + MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg()); + if (SrcMI->getOperand(1).isKill()) { + MI.getOperand(1).setIsKill(true); + SrcMI->getOperand(1).setIsKill(false); + } else + // About to replace MI.getOperand(1), clear its kill flag. + MI.getOperand(1).setIsKill(false); } - Simplified = true; - NumRotatesCollapsed++; LLVM_DEBUG(dbgs() << "With: "); LLVM_DEBUG(MI.dump()); @@ -925,16 +932,7 @@ bool PPCMIPeephole::simplifyCode(void) { // than NewME. Otherwise we get a 64 bit value after folding, but MI // return a 32 bit value. - // If FoldingReg has only one use and it it not RLWINM_rec and - // RLWINM8_rec, safe to delete its def SrcMI. Otherwise keep it. - if (MRI->hasOneNonDBGUse(FoldingReg) && - (SrcMI->getOpcode() == PPC::RLWINM || - SrcMI->getOpcode() == PPC::RLWINM8)) { - ToErase = SrcMI; - LLVM_DEBUG(dbgs() << "Delete dead instruction: "); - LLVM_DEBUG(SrcMI->dump()); - } - + Simplified = true; LLVM_DEBUG(dbgs() << "Converting Instr: "); LLVM_DEBUG(MI.dump()); @@ -953,12 +951,20 @@ bool PPCMIPeephole::simplifyCode(void) { // About to replace MI.getOperand(1), clear its kill flag. MI.getOperand(1).setIsKill(false); - Simplified = true; - NumRotatesCollapsed++; - LLVM_DEBUG(dbgs() << "To: "); LLVM_DEBUG(MI.dump()); } + if (Simplified) { + // If FoldingReg has no non-debug use and it has no implicit def (it + // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI. + // Otherwise keep it. + ++NumRotatesCollapsed; + if (MRI->use_nodbg_empty(FoldingReg) && !SrcMI->hasImplicitDef()) { + ToErase = SrcMI; + LLVM_DEBUG(dbgs() << "Delete dead instruction: "); + LLVM_DEBUG(SrcMI->dump()); + } + } break; } } diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index dc19cb0ac3093..77122e62dd5fb 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -151,6 +151,9 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { TargetTriple.isMusl()) SecurePlt = true; + if (TargetTriple.getSubArch() == Triple::PPCSubArch_spe) + HasSPE = true; + if (HasSPE && IsPPC64) report_fatal_error( "SPE is only supported for 32-bit targets.\n", false); if (HasSPE && (HasAltivec || HasQPX || HasVSX || HasFPU)) diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp index 97a1eb2f190a9..f070b143d5b4e 100644 --- a/llvm/lib/Target/TargetMachine.cpp +++ b/llvm/lib/Target/TargetMachine.cpp @@ -12,6 +12,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/MIRFormatter.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" @@ -37,7 +38,9 @@ TargetMachine::TargetMachine(const Target &T, StringRef DataLayoutString, : TheTarget(T), DL(DataLayoutString), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS), AsmInfo(nullptr), MRI(nullptr), MII(nullptr), STI(nullptr), RequireStructuredCFG(false), O0WantsFastISel(false), - DefaultOptions(Options), Options(Options) {} + DefaultOptions(Options), Options(Options) { + MIRF = std::make_unique(); +} TargetMachine::~TargetMachine() = default; diff --git a/llvm/lib/Target/VE/CMakeLists.txt b/llvm/lib/Target/VE/CMakeLists.txt new file mode 100644 index 0000000000000..a3eb8bae4ac4a --- /dev/null +++ b/llvm/lib/Target/VE/CMakeLists.txt @@ -0,0 +1,8 @@ +set(LLVM_TARGET_DEFINITIONS VE.td) + +add_llvm_target(VECodeGen + VETargetMachine.cpp + ) + +add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) diff --git a/llvm/lib/Target/VE/LLVMBuild.txt b/llvm/lib/Target/VE/LLVMBuild.txt new file mode 100644 index 0000000000000..b45efd45c8aca --- /dev/null +++ b/llvm/lib/Target/VE/LLVMBuild.txt @@ -0,0 +1,33 @@ +;===- ./lib/Target/VE/LLVMBuild.txt ----------------------------*- Conf -*--===; +; +; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = MCTargetDesc TargetInfo + +[component_0] +type = TargetGroup +name = VE +parent = Target +has_asmparser = 0 +has_asmprinter = 0 + +[component_1] +type = Library +name = VECodeGen +parent = VE +required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG + VEDesc VEInfo Support Target +add_to_library_groups = VE diff --git a/llvm/lib/Target/VE/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/VE/MCTargetDesc/CMakeLists.txt new file mode 100644 index 0000000000000..fa2fefbe47f05 --- /dev/null +++ b/llvm/lib/Target/VE/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_library(LLVMVEDesc + VEMCTargetDesc.cpp + ) diff --git a/llvm/lib/Target/VE/MCTargetDesc/LLVMBuild.txt b/llvm/lib/Target/VE/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 0000000000000..e585042e60bba --- /dev/null +++ b/llvm/lib/Target/VE/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/Target/VE/MCTargetDesc/LLVMBuild.txt ---------------*- Conf -*--===; +; +; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = VEDesc +parent = VE +required_libraries = MC VEInfo Support +add_to_library_groups = VE diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp new file mode 100644 index 0000000000000..7067f34a016f7 --- /dev/null +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp @@ -0,0 +1,19 @@ +//===-- VEMCTargetDesc.cpp - VE Target Descriptions -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides VE specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "VEMCTargetDesc.h" + +using namespace llvm; + +extern "C" void LLVMInitializeVETargetMC() { + // TODO +} diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h new file mode 100644 index 0000000000000..a7969042606c0 --- /dev/null +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h @@ -0,0 +1,27 @@ +//===-- VEMCTargetDesc.h - VE Target Descriptions ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides VE specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_VE_MCTARGETDESC_VEMCTARGETDESC_H +#define LLVM_LIB_TARGET_VE_MCTARGETDESC_VEMCTARGETDESC_H + +#include "llvm/Support/DataTypes.h" + +#include + +namespace llvm { + +class Target; +Target &getTheVETarget(); + +} // end llvm namespace + +#endif diff --git a/llvm/lib/Target/VE/TargetInfo/CMakeLists.txt b/llvm/lib/Target/VE/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000000000..0850b0f27bf2a --- /dev/null +++ b/llvm/lib/Target/VE/TargetInfo/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_component_library(LLVMVEInfo + VETargetInfo.cpp + ) diff --git a/llvm/lib/Target/VE/TargetInfo/LLVMBuild.txt b/llvm/lib/Target/VE/TargetInfo/LLVMBuild.txt new file mode 100644 index 0000000000000..c440132476a4d --- /dev/null +++ b/llvm/lib/Target/VE/TargetInfo/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/Target/VE/TargetInfo/LLVMBuild.txt -----------------*- Conf -*--===; +; +; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = VEInfo +parent = VE +required_libraries = Support +add_to_library_groups = VE diff --git a/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp b/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp new file mode 100644 index 0000000000000..be68fe7d24291 --- /dev/null +++ b/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp @@ -0,0 +1,23 @@ +//===-- VETargetInfo.cpp - VE Target Implementation -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "VE.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +Target &llvm::getTheVETarget() { + static Target TheVETarget; + return TheVETarget; +} + +extern "C" void LLVMInitializeVETargetInfo() { + RegisterTarget X(getTheVETarget(), "ve", + "VE", "VE"); +} diff --git a/llvm/lib/Target/VE/VE.h b/llvm/lib/Target/VE/VE.h new file mode 100644 index 0000000000000..51d3e701f8ec0 --- /dev/null +++ b/llvm/lib/Target/VE/VE.h @@ -0,0 +1,19 @@ +//===-- VE.h - Top-level interface for VE representation --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// VE back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_VE_VE_H +#define LLVM_LIB_TARGET_VE_VE_H + +#include "MCTargetDesc/VEMCTargetDesc.h" + +#endif diff --git a/llvm/lib/Target/VE/VETargetMachine.cpp b/llvm/lib/Target/VE/VETargetMachine.cpp new file mode 100644 index 0000000000000..10fe9ba0e7ebc --- /dev/null +++ b/llvm/lib/Target/VE/VETargetMachine.cpp @@ -0,0 +1,62 @@ +//===-- VETargetMachine.cpp - Define TargetMachine for VE -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#include "VETargetMachine.h" +#include "VE.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +#define DEBUG_TYPE "ve" + +extern "C" void LLVMInitializeVETarget() { + // Register the target. + RegisterTargetMachine X(getTheVETarget()); +} + +static std::string computeDataLayout(const Triple &T) { + // Aurora VE is little endian + std::string Ret = "e"; + + // Use ELF mangling + Ret += "-m:e"; + + // Alignments for 64 bit integers. + Ret += "-i64:64"; + + // VE supports 32 bit and 64 bits integer on registers + Ret += "-n32:64"; + + // Stack alignment is 64 bits + Ret += "-S64"; + + return Ret; +} + +static Reloc::Model getEffectiveRelocModel(Optional RM) { + if (!RM.hasValue()) + return Reloc::Static; + return *RM; +} + +/// Create an Aurora VE architecture model +VETargetMachine::VETargetMachine( + const Target &T, const Triple &TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Optional RM, + Optional CM, CodeGenOpt::Level OL, bool JIT) + : LLVMTargetMachine( + T, computeDataLayout(TT), TT, CPU, FS, Options, + getEffectiveRelocModel(RM), + getEffectiveCodeModel(CM, CodeModel::Small), + OL) +{} + +VETargetMachine::~VETargetMachine() {} diff --git a/llvm/lib/Target/VE/VETargetMachine.h b/llvm/lib/Target/VE/VETargetMachine.h new file mode 100644 index 0000000000000..ac6089036ff8e --- /dev/null +++ b/llvm/lib/Target/VE/VETargetMachine.h @@ -0,0 +1,31 @@ +//===-- VETargetMachine.h - Define TargetMachine for VE ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the VE specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_VE_VETARGETMACHINE_H +#define LLVM_LIB_TARGET_VE_VETARGETMACHINE_H + +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class VETargetMachine : public LLVMTargetMachine { +public: + VETargetMachine(const Target &T, const Triple &TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Optional RM, Optional CM, + CodeGenOpt::Level OL, bool JIT); + ~VETargetMachine() override; +}; + +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp index 0915a1532df9e..b1d2de29c8965 100644 --- a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp +++ b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp @@ -702,6 +702,9 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { } Blocks.push_back(SuccMBB); + + // After this, EFLAGS will be recreated before each use. + SuccMBB->removeLiveIn(X86::EFLAGS); } } while (!Blocks.empty()); diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 13fcf6aa72472..39e2057b1b6ee 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -31,6 +31,7 @@ #include "llvm/IR/CFG.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -2824,90 +2825,12 @@ struct AAIsDeadFunction : public AAIsDead { bool MayReturn = !NoReturnAA.isAssumedNoReturn(); if (MayReturn && (!Invoke2CallAllowed || !isa(CB))) continue; - Instruction *I = const_cast(DeadEndI); - BasicBlock *BB = I->getParent(); - Instruction *SplitPos = I->getNextNode(); - // TODO: mark stuff before unreachable instructions as dead. - - if (auto *II = dyn_cast(I)) { - // If we keep the invoke the split position is at the beginning of the - // normal desitination block (it invokes a noreturn function after all). - BasicBlock *NormalDestBB = II->getNormalDest(); - SplitPos = &NormalDestBB->front(); - - /// Invoke is replaced with a call and unreachable is placed after it if - /// the callee is nounwind and noreturn. Otherwise, we keep the invoke - /// and only place an unreachable in the normal successor. - if (Invoke2CallAllowed) { - if (II->getCalledFunction()) { - const IRPosition &IPos = IRPosition::callsite_function(*II); - const auto &AANoUnw = A.getAAFor(*this, IPos); - if (AANoUnw.isAssumedNoUnwind()) { - LLVM_DEBUG(dbgs() - << "[AAIsDead] Replace invoke with call inst\n"); - CallInst *CI = createCallMatchingInvoke(II); - CI->insertBefore(II); - CI->takeName(II); - replaceAllInstructionUsesWith(*II, *CI); - - // If this is a nounwind + mayreturn invoke we only remove the - // unwind edge. This is done by moving the invoke into a new and - // dead block and connecting the normal destination of the invoke - // with a branch that follows the call replacement we created - // above. - if (MayReturn) { - BasicBlock *NewDeadBB = - SplitBlock(BB, II, nullptr, nullptr, nullptr, ".i2c"); - assert(isa(BB->getTerminator()) && - BB->getTerminator()->getNumSuccessors() == 1 && - BB->getTerminator()->getSuccessor(0) == NewDeadBB); - new UnreachableInst(I->getContext(), NewDeadBB); - BB->getTerminator()->setOperand(0, NormalDestBB); - A.deleteAfterManifest(*II); - continue; - } - - // We do not need an invoke (II) but instead want a call followed - // by an unreachable. However, we do not remove II as other - // abstract attributes might have it cached as part of their - // results. Given that we modify the CFG anyway, we simply keep II - // around but in a new dead block. To avoid II being live through - // a different edge we have to ensure the block we place it in is - // only reached from the current block of II and then not reached - // at all when we insert the unreachable. - SplitBlockPredecessors(NormalDestBB, {BB}, ".i2c"); - SplitPos = CI->getNextNode(); - } - } - } - - if (SplitPos == &NormalDestBB->front()) { - // If this is an invoke of a noreturn function the edge to the normal - // destination block is dead but not necessarily the block itself. - // TODO: We need to move to an edge based system during deduction and - // also manifest. - assert(!NormalDestBB->isLandingPad() && - "Expected the normal destination not to be a landingpad!"); - if (NormalDestBB->getUniquePredecessor() == BB) { - assumeLive(A, *NormalDestBB); - } else { - BasicBlock *SplitBB = - SplitBlockPredecessors(NormalDestBB, {BB}, ".dead"); - // The split block is live even if it contains only an unreachable - // instruction at the end. - assumeLive(A, *SplitBB); - SplitPos = SplitBB->getTerminator(); - HasChanged = ChangeStatus::CHANGED; - } - } - } - if (isa_and_nonnull(SplitPos)) - continue; - - BB = SplitPos->getParent(); - SplitBlock(BB, SplitPos); - A.changeToUnreachableAfterManifest(BB->getTerminator()); + if (auto *II = dyn_cast(DeadEndI)) + A.registerInvokeWithDeadSuccessor(const_cast(*II)); + else + A.changeToUnreachableAfterManifest( + const_cast(DeadEndI->getNextNode())); HasChanged = ChangeStatus::CHANGED; } @@ -5668,8 +5591,35 @@ ChangeStatus Attributor::run(Module &M) { } } } - for (Instruction *I : ToBeChangedToUnreachableInsts) - changeToUnreachable(I, /* UseLLVMTrap */ false); + for (auto &V : InvokeWithDeadSuccessor) + if (InvokeInst *II = dyn_cast_or_null(V)) { + bool UnwindBBIsDead = II->hasFnAttr(Attribute::NoUnwind); + bool NormalBBIsDead = II->hasFnAttr(Attribute::NoReturn); + bool Invoke2CallAllowed = + !AAIsDeadFunction::mayCatchAsynchronousExceptions( + *II->getFunction()); + assert((UnwindBBIsDead || NormalBBIsDead) && + "Invoke does not have dead successors!"); + BasicBlock *BB = II->getParent(); + BasicBlock *NormalDestBB = II->getNormalDest(); + if (UnwindBBIsDead) { + Instruction *NormalNextIP = &NormalDestBB->front(); + if (Invoke2CallAllowed) { + changeToCall(II); + NormalNextIP = BB->getTerminator(); + } + if (NormalBBIsDead) + ToBeChangedToUnreachableInsts.insert(NormalNextIP); + } else { + assert(NormalBBIsDead && "Broken invariant!"); + if (!NormalDestBB->getUniquePredecessor()) + NormalDestBB = SplitBlockPredecessors(NormalDestBB, {BB}, ".dead"); + ToBeChangedToUnreachableInsts.insert(&NormalDestBB->front()); + } + } + for (auto &V : ToBeChangedToUnreachableInsts) + if (Instruction *I = dyn_cast_or_null(V)) + changeToUnreachable(I, /* UseLLVMTrap */ false); for (Instruction *I : TerminatorsToFold) ConstantFoldTerminator(I->getParent()); @@ -6336,7 +6286,9 @@ static bool runAttributorOnModule(Module &M, AnalysisGetter &AG) { A.identifyDefaultAbstractAttributes(F); } - return A.run(M) == ChangeStatus::CHANGED; + bool Changed = A.run(M) == ChangeStatus::CHANGED; + assert(!verifyModule(M, &errs()) && "Module verification failed!"); + return Changed; } PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index f7b39d98d4923..2774e46151faf 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -1239,6 +1239,14 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { Value *YZ = Builder.CreateFMulFMF(Y, Op0, &I); return BinaryOperator::CreateFDivFMF(YZ, X, &I); } + // Z / (1.0 / Y) => (Y * Z) + // + // This is a special case of Z / (X / Y) => (Y * Z) / X, with X = 1.0. The + // m_OneUse check is avoided because even in the case of the multiple uses + // for 1.0/Y, the number of instructions remain the same and a division is + // replaced by a multiplication. + if (match(Op1, m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) + return BinaryOperator::CreateFMulFMF(Y, Op0, &I); } if (I.hasAllowReassoc() && Op0->hasOneUse() && Op1->hasOneUse()) { diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp index 558f63113db63..92ad8dafa5abc 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp @@ -427,51 +427,76 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, return UnrollResult; } +static bool tryToUnrollAndJamLoop(Function &F, DominatorTree &DT, LoopInfo &LI, + ScalarEvolution &SE, + const TargetTransformInfo &TTI, + AssumptionCache &AC, DependenceInfo &DI, + OptimizationRemarkEmitter &ORE, + int OptLevel) { + bool DidSomething = false; + + // The loop unroll and jam pass requires loops to be in simplified form, and also needs LCSSA. + // Since simplification may add new inner loops, it has to run before the + // legality and profitability checks. This means running the loop unroll and jam pass + // will simplify all loops, regardless of whether anything end up being + // unroll and jammed. + for (auto &L : LI) { + DidSomething |= + simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */); + DidSomething |= formLCSSARecursively(*L, DT, &LI, &SE); + } + + SmallPriorityWorklist Worklist; + internal::appendLoopsToWorklist(reverse(LI), Worklist); + while (!Worklist.empty()) { + Loop *L = Worklist.pop_back_val(); + formLCSSA(*L, DT, &LI, &SE); + LoopUnrollResult Result = + tryToUnrollAndJamLoop(L, DT, &LI, SE, TTI, AC, DI, ORE, OptLevel); + if (Result != LoopUnrollResult::Unmodified) + DidSomething = true; + } + + return DidSomething; +} + namespace { -class LoopUnrollAndJam : public LoopPass { +class LoopUnrollAndJam : public FunctionPass { public: static char ID; // Pass ID, replacement for typeid unsigned OptLevel; - LoopUnrollAndJam(int OptLevel = 2) : LoopPass(ID), OptLevel(OptLevel) { + LoopUnrollAndJam(int OptLevel = 2) : FunctionPass(ID), OptLevel(OptLevel) { initializeLoopUnrollAndJamPass(*PassRegistry::getPassRegistry()); } - bool runOnLoop(Loop *L, LPPassManager &LPM) override { - if (skipLoop(L)) + bool runOnFunction(Function &F) override { + if (skipFunction(F)) return false; - Function &F = *L->getHeader()->getParent(); - auto &DT = getAnalysis().getDomTree(); - LoopInfo *LI = &getAnalysis().getLoopInfo(); + LoopInfo &LI = getAnalysis().getLoopInfo(); ScalarEvolution &SE = getAnalysis().getSE(); const TargetTransformInfo &TTI = getAnalysis().getTTI(F); auto &AC = getAnalysis().getAssumptionCache(F); auto &DI = getAnalysis().getDI(); - // For the old PM, we can't use OptimizationRemarkEmitter as an analysis - // pass. Function analyses need to be preserved across loop transformations - // but ORE cannot be preserved (see comment before the pass definition). - OptimizationRemarkEmitter ORE(&F); - - LoopUnrollResult Result = - tryToUnrollAndJamLoop(L, DT, LI, SE, TTI, AC, DI, ORE, OptLevel); + auto &ORE = getAnalysis().getORE(); - if (Result == LoopUnrollResult::FullyUnrolled) - LPM.markLoopAsDeleted(*L); - - return Result != LoopUnrollResult::Unmodified; + return tryToUnrollAndJamLoop(F, DT, LI, SE, TTI, AC, DI, ORE, OptLevel); } /// This transformation requires natural loop information & requires that /// loop preheaders be inserted into the CFG... void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addRequired(); - getLoopAnalysisUsage(AU); + AU.addRequired(); } }; @@ -481,10 +506,13 @@ char LoopUnrollAndJam::ID = 0; INITIALIZE_PASS_BEGIN(LoopUnrollAndJam, "loop-unroll-and-jam", "Unroll and Jam loops", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(LoopPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass) +INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_END(LoopUnrollAndJam, "loop-unroll-and-jam", "Unroll and Jam loops", false, false) @@ -492,26 +520,18 @@ Pass *llvm::createLoopUnrollAndJamPass(int OptLevel) { return new LoopUnrollAndJam(OptLevel); } -PreservedAnalyses LoopUnrollAndJamPass::run(Loop &L, LoopAnalysisManager &AM, - LoopStandardAnalysisResults &AR, - LPMUpdater &) { - const auto &FAM = - AM.getResult(L, AR).getManager(); - Function *F = L.getHeader()->getParent(); - - auto *ORE = FAM.getCachedResult(*F); - // FIXME: This should probably be optional rather than required. - if (!ORE) - report_fatal_error( - "LoopUnrollAndJamPass: OptimizationRemarkEmitterAnalysis not cached at " - "a higher level"); - - DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI); - - LoopUnrollResult Result = tryToUnrollAndJamLoop( - &L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, DI, *ORE, OptLevel); - - if (Result == LoopUnrollResult::Unmodified) +PreservedAnalyses LoopUnrollAndJamPass::run(Function &F, + FunctionAnalysisManager &AM) { + ScalarEvolution &SE = AM.getResult(F); + LoopInfo &LI = AM.getResult(F); + TargetTransformInfo &TTI = AM.getResult(F); + AssumptionCache &AC = AM.getResult(F); + DominatorTree &DT = AM.getResult(F); + DependenceInfo &DI = AM.getResult(F); + OptimizationRemarkEmitter &ORE = + AM.getResult(F); + + if (!tryToUnrollAndJamLoop(F, DT, LI, SE, TTI, AC, DI, ORE, OptLevel)) return PreservedAnalyses::all(); return getLoopPassPreservedAnalyses(); diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index a9566422a8324..0ff6ee8bcfcc2 100644 --- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -10,9 +10,6 @@ // // TODO: // * Implement multiply & add fusion -// * Implement shape propagation -// * Implement optimizations to reduce or eliminateshufflevector uses by using -// shape information. // * Add remark, summarizing the available matrix optimization opportunities. // //===----------------------------------------------------------------------===// @@ -95,20 +92,20 @@ Value *computeColumnAddr(Value *BasePtr, Value *Col, Value *Stride, unsigned AS = cast(BasePtr->getType())->getAddressSpace(); // Compute the start of the column with index Col as Col * Stride. - Value *ColumnStart = Builder.CreateMul(Col, Stride); + Value *ColumnStart = Builder.CreateMul(Col, Stride, "col.start"); // Get pointer to the start of the selected column. Skip GEP creation, // if we select column 0. if (isa(ColumnStart) && cast(ColumnStart)->isZero()) ColumnStart = BasePtr; else - ColumnStart = Builder.CreateGEP(EltType, BasePtr, ColumnStart); + ColumnStart = Builder.CreateGEP(EltType, BasePtr, ColumnStart, "col.gep"); // Cast elementwise column start pointer to a pointer to a column // (EltType x NumRows)*. Type *ColumnType = VectorType::get(EltType, NumRows); Type *ColumnPtrType = PointerType::get(ColumnType, AS); - return Builder.CreatePointerCast(ColumnStart, ColumnPtrType); + return Builder.CreatePointerCast(ColumnStart, ColumnPtrType, "col.cast"); } /// LowerMatrixIntrinsics contains the methods used to lower matrix intrinsics. @@ -317,36 +314,16 @@ class LowerMatrixIntrinsics { default: return false; } - return isUniformShape(V) || isa(V); + return isUniformShape(V) || isa(V) || isa(V); } /// Propagate the shape information of instructions to their users. - void propagateShapeForward() { - // The work list contains instructions for which we can compute the shape, - // either based on the information provided by matrix intrinsics or known - // shapes of operands. - SmallVector WorkList; - - // Initialize the work list with ops carrying shape information. Initially - // only the shape of matrix intrinsics is known. - for (BasicBlock &BB : Func) - for (Instruction &Inst : BB) { - IntrinsicInst *II = dyn_cast(&Inst); - if (!II) - continue; - - switch (II->getIntrinsicID()) { - case Intrinsic::matrix_multiply: - case Intrinsic::matrix_transpose: - case Intrinsic::matrix_columnwise_load: - case Intrinsic::matrix_columnwise_store: - WorkList.push_back(&Inst); - break; - default: - break; - } - } - + /// The work list contains instructions for which we can compute the shape, + /// either based on the information provided by matrix intrinsics or known + /// shapes of operands. + SmallVector + propagateShapeForward(SmallVectorImpl &WorkList) { + SmallVector NewWorkList; // Pop an element for which we guaranteed to have at least one of the // operand shapes. Add the shape for this and then add users to the work // list. @@ -395,16 +372,120 @@ class LowerMatrixIntrinsics { } } - if (Propagate) + if (Propagate) { + NewWorkList.push_back(Inst); for (auto *User : Inst->users()) if (ShapeMap.count(User) == 0) WorkList.push_back(cast(User)); + } } + + return NewWorkList; + } + + /// Propagate the shape to operands of instructions with shape information. + /// \p Worklist contains the instruction for which we already know the shape. + SmallVector + propagateShapeBackward(SmallVectorImpl &WorkList) { + SmallVector NewWorkList; + + auto pushInstruction = [](Value *V, + SmallVectorImpl &WorkList) { + Instruction *I = dyn_cast(V); + if (I) + WorkList.push_back(I); + }; + // Pop an element with known shape. Traverse the operands, if their shape + // derives from the result shape and is unknown, add it and add them to the + // worklist. + LLVM_DEBUG(dbgs() << "Backward-propagate shapes:\n"); + while (!WorkList.empty()) { + Value *V = WorkList.back(); + WorkList.pop_back(); + + size_t BeforeProcessingV = WorkList.size(); + if (!isa(V)) + continue; + + Value *MatrixA; + Value *MatrixB; + Value *M; + Value *N; + Value *K; + if (match(V, m_Intrinsic( + m_Value(MatrixA), m_Value(MatrixB), m_Value(M), + m_Value(N), m_Value(K)))) { + if (setShapeInfo(MatrixA, {M, N})) + pushInstruction(MatrixA, WorkList); + + if (setShapeInfo(MatrixB, {N, K})) + pushInstruction(MatrixB, WorkList); + + } else if (match(V, m_Intrinsic( + m_Value(MatrixA), m_Value(M), m_Value(N)))) { + // Flip dimensions. + if (setShapeInfo(MatrixA, {M, N})) + pushInstruction(MatrixA, WorkList); + } else if (match(V, m_Intrinsic( + m_Value(MatrixA), m_Value(), m_Value(), + m_Value(M), m_Value(N)))) { + if (setShapeInfo(MatrixA, {M, N})) { + pushInstruction(MatrixA, WorkList); + } + } else if (isa(V) || + match(V, m_Intrinsic())) { + // Nothing to do, no matrix input. + } else if (isa(V)) { + // Nothing to do. We forward-propagated to this so we would just + // backward propagate to an instruction with an already known shape. + } else if (isUniformShape(V)) { + // Propagate to all operands. + ShapeInfo Shape = ShapeMap[V]; + for (Use &U : cast(V)->operands()) { + if (setShapeInfo(U.get(), Shape)) + pushInstruction(U.get(), WorkList); + } + } + // After we discovered new shape info for new instructions in the + // worklist, we use their users as seeds for the next round of forward + // propagation. + for (size_t I = BeforeProcessingV; I != WorkList.size(); I++) + for (User *U : WorkList[I]->users()) + if (isa(U) && V != U) + NewWorkList.push_back(cast(U)); + } + return NewWorkList; } bool Visit() { - if (EnableShapePropagation) - propagateShapeForward(); + if (EnableShapePropagation) { + SmallVector WorkList; + + // Initially only the shape of matrix intrinsics is known. + // Initialize the work list with ops carrying shape information. + for (BasicBlock &BB : Func) + for (Instruction &Inst : BB) { + IntrinsicInst *II = dyn_cast(&Inst); + if (!II) + continue; + + switch (II->getIntrinsicID()) { + case Intrinsic::matrix_multiply: + case Intrinsic::matrix_transpose: + case Intrinsic::matrix_columnwise_load: + case Intrinsic::matrix_columnwise_store: + WorkList.push_back(&Inst); + break; + default: + break; + } + } + // Propagate shapes until nothing changes any longer. + while (!WorkList.empty()) { + WorkList = propagateShapeForward(WorkList); + WorkList = propagateShapeBackward(WorkList); + } + } ReversePostOrderTraversal RPOT(&Func); bool Changed = false; @@ -419,6 +500,8 @@ class LowerMatrixIntrinsics { Value *Op2; if (auto *BinOp = dyn_cast(&Inst)) Changed |= VisitBinaryOperator(BinOp); + if (match(&Inst, m_Load(m_Value(Op1)))) + Changed |= VisitLoad(&Inst, Op1, Builder); else if (match(&Inst, m_Store(m_Value(Op1), m_Value(Op2)))) Changed |= VisitStore(&Inst, Op1, Op2, Builder); } @@ -433,7 +516,7 @@ class LowerMatrixIntrinsics { LoadInst *createColumnLoad(Value *ColumnPtr, Type *EltType, IRBuilder<> Builder) { unsigned Align = DL.getABITypeAlignment(EltType); - return Builder.CreateAlignedLoad(ColumnPtr, Align); + return Builder.CreateAlignedLoad(ColumnPtr, Align, "col.load"); } StoreInst *createColumnStore(Value *ColumnValue, Value *ColumnPtr, @@ -474,17 +557,11 @@ class LowerMatrixIntrinsics { return true; } - /// Lowers llvm.matrix.columnwise.load. - /// - /// The intrinsic loads a matrix from memory using a stride between columns. - void LowerColumnwiseLoad(CallInst *Inst) { + void LowerLoad(Instruction *Inst, Value *Ptr, Value *Stride, + ShapeInfo Shape) { IRBuilder<> Builder(Inst); - Value *Ptr = Inst->getArgOperand(0); - Value *Stride = Inst->getArgOperand(1); auto VType = cast(Inst->getType()); Value *EltPtr = createElementPtr(Ptr, VType->getElementType(), Builder); - ShapeInfo Shape(Inst->getArgOperand(2), Inst->getArgOperand(3)); - ColumnMatrixTy Result; // Distance between start of one column and the start of the next for (unsigned C = 0, E = Shape.NumColumns; C < E; ++C) { @@ -498,6 +575,16 @@ class LowerMatrixIntrinsics { finalizeLowering(Inst, Result, Builder); } + /// Lowers llvm.matrix.columnwise.load. + /// + /// The intrinsic loads a matrix from memory using a stride between columns. + void LowerColumnwiseLoad(CallInst *Inst) { + Value *Ptr = Inst->getArgOperand(0); + Value *Stride = Inst->getArgOperand(1); + LowerLoad(Inst, Ptr, Stride, + {Inst->getArgOperand(2), Inst->getArgOperand(3)}); + } + void LowerStore(Instruction *Inst, Value *Matrix, Value *Ptr, Value *Stride, ShapeInfo Shape) { IRBuilder<> Builder(Inst); @@ -693,6 +780,16 @@ class LowerMatrixIntrinsics { finalizeLowering(Inst, Result, Builder); } + /// Lower load instructions, if shape information is available. + bool VisitLoad(Instruction *Inst, Value *Ptr, IRBuilder<> &Builder) { + auto I = ShapeMap.find(Inst); + if (I == ShapeMap.end()) + return false; + + LowerLoad(Inst, Ptr, Builder.getInt32(I->second.NumRows), I->second); + return true; + } + bool VisitStore(Instruction *Inst, Value *StoredVal, Value *Ptr, IRBuilder<> &Builder) { auto I = ShapeMap.find(StoredVal); diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index b7dd3d75e4580..c4c40189fda46 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -714,19 +714,19 @@ Optional llvm::getLoopEstimatedTripCount(Loop *L) { // To estimate the number of times the loop body was executed, we want to // know the number of times the backedge was taken, vs. the number of times // we exited the loop. - uint64_t TrueVal, FalseVal; - if (!LatchBR->extractProfMetadata(TrueVal, FalseVal)) + uint64_t BackedgeTakenWeight, LatchExitWeight; + if (!LatchBR->extractProfMetadata(BackedgeTakenWeight, LatchExitWeight)) return None; - if (!TrueVal || !FalseVal) + if (LatchBR->getSuccessor(0) != L->getHeader()) + std::swap(BackedgeTakenWeight, LatchExitWeight); + + if (!BackedgeTakenWeight || !LatchExitWeight) return 0; // Divide the count of the backedge by the count of the edge exiting the loop, // rounding to nearest. - if (LatchBR->getSuccessor(0) == L->getHeader()) - return (TrueVal + (FalseVal / 2)) / FalseVal; - else - return (FalseVal + (TrueVal / 2)) / TrueVal; + return llvm::divideNearest(BackedgeTakenWeight, LatchExitWeight); } bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop, diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index fd30d52a562a2..0400e44dd0ecf 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7502,30 +7502,43 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues); } -static ScalarEpilogueLowering -getScalarEpilogueLowering(Function *F, Loop *L, LoopVectorizeHints &Hints, - ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, - TargetTransformInfo *TTI, TargetLibraryInfo *TLI, - AssumptionCache *AC, LoopInfo *LI, - ScalarEvolution *SE, DominatorTree *DT, - const LoopAccessInfo *LAI) { - ScalarEpilogueLowering SEL = CM_ScalarEpilogueAllowed; +// Determine how to lower the scalar epilogue, which depends on 1) optimising +// for minimum code-size, 2) predicate compiler options, 3) loop hints forcing +// predication, and 4) a TTI hook that analyses whether the loop is suitable +// for predication. +static ScalarEpilogueLowering getScalarEpilogueLowering( + Function *F, Loop *L, LoopVectorizeHints &Hints, ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI, TargetTransformInfo *TTI, TargetLibraryInfo *TLI, + AssumptionCache *AC, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, + LoopVectorizationLegality &LVL) { + bool OptSize = + F->hasOptSize() || llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI, + PGSOQueryType::IRPass); + // 1) OptSize takes precedence over all other options, i.e. if this is set, + // don't look at hints or options, and don't request a scalar epilogue. + if (OptSize && Hints.getForce() != LoopVectorizeHints::FK_Enabled) + return CM_ScalarEpilogueNotAllowedOptSize; + bool PredicateOptDisabled = PreferPredicateOverEpilog.getNumOccurrences() && !PreferPredicateOverEpilog; - if (Hints.getForce() != LoopVectorizeHints::FK_Enabled && - (F->hasOptSize() || - llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI, - PGSOQueryType::IRPass))) - SEL = CM_ScalarEpilogueNotAllowedOptSize; - else if (PreferPredicateOverEpilog || - Hints.getPredicate() == LoopVectorizeHints::FK_Enabled || - (TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT, LAI) && - Hints.getPredicate() != LoopVectorizeHints::FK_Disabled && - !PredicateOptDisabled)) - SEL = CM_ScalarEpilogueNotNeededUsePredicate; + // 2) Next, if disabling predication is requested on the command line, honour + // this and request a scalar epilogue. Also do this if we don't have a + // primary induction variable, which is required for predication. + if (PredicateOptDisabled || !LVL.getPrimaryInduction()) + return CM_ScalarEpilogueAllowed; + + // 3) and 4) look if enabling predication is requested on the command line, + // with a loop hint, or if the TTI hook indicates this is profitable, request + // predication . + if (PreferPredicateOverEpilog || + Hints.getPredicate() == LoopVectorizeHints::FK_Enabled || + (TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT, + LVL.getLAI()) && + Hints.getPredicate() != LoopVectorizeHints::FK_Disabled)) + return CM_ScalarEpilogueNotNeededUsePredicate; - return SEL; + return CM_ScalarEpilogueAllowed; } // Process the loop in the VPlan-native vectorization path. This path builds @@ -7543,9 +7556,8 @@ static bool processLoopInVPlanNativePath( Function *F = L->getHeader()->getParent(); InterleavedAccessInfo IAI(PSE, L, DT, LI, LVL->getLAI()); - ScalarEpilogueLowering SEL = - getScalarEpilogueLowering(F, L, Hints, PSI, BFI, TTI, TLI, AC, LI, - PSE.getSE(), DT, LVL->getLAI()); + ScalarEpilogueLowering SEL = getScalarEpilogueLowering( + F, L, Hints, PSI, BFI, TTI, TLI, AC, LI, PSE.getSE(), DT, *LVL); LoopVectorizationCostModel CM(SEL, L, PSE, LI, LVL, *TTI, TLI, DB, AC, ORE, F, &Hints, IAI); @@ -7637,9 +7649,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { // Check the function attributes and profiles to find out if this function // should be optimized for size. - ScalarEpilogueLowering SEL = - getScalarEpilogueLowering(F, L, Hints, PSI, BFI, TTI, TLI, AC, LI, - PSE.getSE(), DT, LVL.getLAI()); + ScalarEpilogueLowering SEL = getScalarEpilogueLowering( + F, L, Hints, PSI, BFI, TTI, TLI, AC, LI, PSE.getSE(), DT, LVL); // Entrance to the VPlan-native vectorization path. Outer loops are processed // here. They may require CFG and instruction level transformations before diff --git a/llvm/test/Analysis/ScalarEvolution/range_nw_flag.ll b/llvm/test/Analysis/ScalarEvolution/range_nw_flag.ll index 65ecd312e0cbb..4a520056d7657 100644 --- a/llvm/test/Analysis/ScalarEvolution/range_nw_flag.ll +++ b/llvm/test/Analysis/ScalarEvolution/range_nw_flag.ll @@ -39,3 +39,22 @@ exit: ret void } +; CHECK-LABEL: @test-addrec-nsw +; CHECK: --> {(-1 + (-10 smin %offset)),+,-1}<%loop> U: [-2147483648,1) S: [-2147483648,1) +define void @test-addrec-nsw(float* %input, i32 %offset, i32 %numIterations) { +entry: + %cmp = icmp slt i32 %offset, -10 + %max = select i1 %cmp, i32 %offset, i32 -10 + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + %nexti = add nsw i32 %i, -1 + %index32 = add nsw i32 %nexti, %max + %ptr = getelementptr inbounds float, float* %input, i32 %index32 + %f = load float, float* %ptr, align 4 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/CodeGen/AArch64/settag-merge.ll b/llvm/test/CodeGen/AArch64/settag-merge.ll deleted file mode 100644 index 1bc93a82070f0..0000000000000 --- a/llvm/test/CodeGen/AArch64/settag-merge.ll +++ /dev/null @@ -1,214 +0,0 @@ -; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s - -declare void @use(i8* %p) -declare void @llvm.aarch64.settag(i8* %p, i64 %a) -declare void @llvm.aarch64.settag.zero(i8* %p, i64 %a) - -define void @stg16_16() { -entry: -; CHECK-LABEL: stg16_16: -; CHECK: st2g sp, [sp], #32 -; CHECK: ret - %a = alloca i8, i32 16, align 16 - %b = alloca i8, i32 16, align 16 - call void @llvm.aarch64.settag(i8* %a, i64 16) - call void @llvm.aarch64.settag(i8* %b, i64 16) - ret void -} - -define i32 @stg16_16_16_16_ret() { -entry: -; CHECK-LABEL: stg16_16_16_16_ret: -; CHECK: st2g sp, [sp, #32] -; CHECK: st2g sp, [sp], #64 -; CHECK: mov w0, wzr -; CHECK: ret - %a = alloca i8, i32 16, align 16 - %b = alloca i8, i32 16, align 16 - %c = alloca i8, i32 16, align 16 - %d = alloca i8, i32 16, align 16 - call void @llvm.aarch64.settag(i8* %a, i64 16) - call void @llvm.aarch64.settag(i8* %b, i64 16) - call void @llvm.aarch64.settag(i8* %c, i64 16) - call void @llvm.aarch64.settag(i8* %d, i64 16) - ret i32 0 -} - -define void @stg16_16_16_16() { -entry: -; CHECK-LABEL: stg16_16_16_16: -; CHECK: st2g sp, [sp, #32] -; CHECK: st2g sp, [sp], #64 -; CHECK: ret - %a = alloca i8, i32 16, align 16 - %b = alloca i8, i32 16, align 16 - %c = alloca i8, i32 16, align 16 - %d = alloca i8, i32 16, align 16 - call void @llvm.aarch64.settag(i8* %a, i64 16) - call void @llvm.aarch64.settag(i8* %b, i64 16) - call void @llvm.aarch64.settag(i8* %c, i64 16) - call void @llvm.aarch64.settag(i8* %d, i64 16) - ret void -} - -define void @stg128_128_128_128() { -entry: -; CHECK-LABEL: stg128_128_128_128: -; CHECK: mov x8, #512 -; CHECK: st2g sp, [sp], #32 -; CHECK: sub x8, x8, #32 -; CHECK: cbnz x8, -; CHECK: ret - %a = alloca i8, i32 128, align 16 - %b = alloca i8, i32 128, align 16 - %c = alloca i8, i32 128, align 16 - %d = alloca i8, i32 128, align 16 - call void @llvm.aarch64.settag(i8* %a, i64 128) - call void @llvm.aarch64.settag(i8* %b, i64 128) - call void @llvm.aarch64.settag(i8* %c, i64 128) - call void @llvm.aarch64.settag(i8* %d, i64 128) - ret void -} - -define void @stg16_512_16() { -entry: -; CHECK-LABEL: stg16_512_16: -; CHECK: mov x8, #544 -; CHECK: st2g sp, [sp], #32 -; CHECK: sub x8, x8, #32 -; CHECK: cbnz x8, -; CHECK: ret - %a = alloca i8, i32 16, align 16 - %b = alloca i8, i32 512, align 16 - %c = alloca i8, i32 16, align 16 - call void @llvm.aarch64.settag(i8* %a, i64 16) - call void @llvm.aarch64.settag(i8* %b, i64 512) - call void @llvm.aarch64.settag(i8* %c, i64 16) - ret void -} - -define void @stg512_512_512() { -entry: -; CHECK-LABEL: stg512_512_512: -; CHECK: mov x8, #1536 -; CHECK: st2g sp, [sp], #32 -; CHECK: sub x8, x8, #32 -; CHECK: cbnz x8, -; CHECK: ret - %a = alloca i8, i32 512, align 16 - %b = alloca i8, i32 512, align 16 - %c = alloca i8, i32 512, align 16 - call void @llvm.aarch64.settag(i8* %a, i64 512) - call void @llvm.aarch64.settag(i8* %b, i64 512) - call void @llvm.aarch64.settag(i8* %c, i64 512) - ret void -} - -define void @early(i1 %flag) { -entry: -; CHECK-LABEL: early: -; CHECK: tbz w0, #0, [[LABEL:.LBB.*]] -; CHECK: st2g sp, [sp, # -; CHECK: st2g sp, [sp, # -; CHECK: st2g sp, [sp, # -; CHECK: [[LABEL]]: -; CHECK: stg sp, [sp, # -; CHECK: st2g sp, [sp], # -; CHECK: ret - %a = alloca i8, i32 48, align 16 - %b = alloca i8, i32 48, align 16 - %c = alloca i8, i32 48, align 16 - br i1 %flag, label %if.then, label %if.end - -if.then: - call void @llvm.aarch64.settag(i8* %a, i64 48) - call void @llvm.aarch64.settag(i8* %b, i64 48) - br label %if.end - -if.end: - call void @llvm.aarch64.settag(i8* %c, i64 48) - ret void -} - -define void @early_128_128(i1 %flag) { -entry: -; CHECK-LABEL: early_128_128: -; CHECK: tbz w0, #0, [[LABEL:.LBB.*]] -; CHECK: add x9, sp, # -; CHECK: mov x8, #256 -; CHECK: st2g x9, [x9], #32 -; CHECK: sub x8, x8, #32 -; CHECK: cbnz x8, -; CHECK: [[LABEL]]: -; CHECK: stg sp, [sp, # -; CHECK: st2g sp, [sp], # -; CHECK: ret - %a = alloca i8, i32 128, align 16 - %b = alloca i8, i32 128, align 16 - %c = alloca i8, i32 48, align 16 - br i1 %flag, label %if.then, label %if.end - -if.then: - call void @llvm.aarch64.settag(i8* %a, i64 128) - call void @llvm.aarch64.settag(i8* %b, i64 128) - br label %if.end - -if.end: - call void @llvm.aarch64.settag(i8* %c, i64 48) - ret void -} - -define void @early_512_512(i1 %flag) { -entry: -; CHECK-LABEL: early_512_512: -; CHECK: tbz w0, #0, [[LABEL:.LBB.*]] -; CHECK: add x9, sp, # -; CHECK: mov x8, #1024 -; CHECK: st2g x9, [x9], #32 -; CHECK: sub x8, x8, #32 -; CHECK: cbnz x8, -; CHECK: [[LABEL]]: -; CHECK: stg sp, [sp, # -; CHECK: st2g sp, [sp], # -; CHECK: ret - %a = alloca i8, i32 512, align 16 - %b = alloca i8, i32 512, align 16 - %c = alloca i8, i32 48, align 16 - br i1 %flag, label %if.then, label %if.end - -if.then: - call void @llvm.aarch64.settag(i8* %a, i64 512) - call void @llvm.aarch64.settag(i8* %b, i64 512) - br label %if.end - -if.end: - call void @llvm.aarch64.settag(i8* %c, i64 48) - ret void -} - -; Two loops of size 256; the second loop updates SP. -define void @stg128_128_gap_128_128() { -entry: -; CHECK-LABEL: stg128_128_gap_128_128: -; CHECK: mov x9, sp -; CHECK: mov x8, #256 -; CHECK: st2g x9, [x9], #32 -; CHECK: sub x8, x8, #32 -; CHECK: cbnz x8, -; CHECK: mov x8, #256 -; CHECK: st2g sp, [sp], #32 -; CHECK: sub x8, x8, #32 -; CHECK: cbnz x8, -; CHECK: ret - %a = alloca i8, i32 128, align 16 - %a2 = alloca i8, i32 128, align 16 - %b = alloca i8, i32 32, align 16 - %c = alloca i8, i32 128, align 16 - %c2 = alloca i8, i32 128, align 16 - call void @use(i8* %b) - call void @llvm.aarch64.settag(i8* %a, i64 128) - call void @llvm.aarch64.settag(i8* %a2, i64 128) - call void @llvm.aarch64.settag(i8* %c, i64 128) - call void @llvm.aarch64.settag(i8* %c2, i64 128) - ret void -} diff --git a/llvm/test/CodeGen/AArch64/settag-merge.mir b/llvm/test/CodeGen/AArch64/settag-merge.mir deleted file mode 100644 index dc2a00c7d3d37..0000000000000 --- a/llvm/test/CodeGen/AArch64/settag-merge.mir +++ /dev/null @@ -1,83 +0,0 @@ -# RUN: llc -mtriple=aarch64 -mattr=+mte -run-pass=prologepilog %s -o - | FileCheck %s - ---- | - declare void @llvm.aarch64.settag(i8* nocapture writeonly, i64) argmemonly nounwind writeonly "target-features"="+mte" - define i32 @stg16_16_16_16_ret() "target-features"="+mte" { - entry: - %a = alloca i8, i32 16, align 16 - %b = alloca i8, i32 16, align 16 - %c = alloca i8, i32 16, align 16 - %d = alloca i8, i32 16, align 16 - call void @llvm.aarch64.settag(i8* %a, i64 16) - call void @llvm.aarch64.settag(i8* %b, i64 16) - call void @llvm.aarch64.settag(i8* %c, i64 16) - call void @llvm.aarch64.settag(i8* %d, i64 16) - ret i32 0 - } - - define void @stg16_store_128() "target-features"="+mte" { - entry: - %a = alloca i8, i32 16, align 16 - %b = alloca i8, i32 128, align 16 - call void @llvm.aarch64.settag(i8* %a, i64 16) - store i8 42, i8* %a - call void @llvm.aarch64.settag(i8* %b, i64 128) - ret void - } - -... ---- -# A sequence of STG with a register copy in the middle. -# Can be merged into ST2G + ST2G. -# CHECK-LABEL: name:{{.*}}stg16_16_16_16_ret -# CHECK-DAG: ST2GOffset $sp, $sp, 2 -# CHECK-DAG: ST2GOffset $sp, $sp, 0 -# CHECK-DAG: $w0 = COPY $wzr -# CHECK-DAG: RET_ReallyLR implicit killed $w0 - -name: stg16_16_16_16_ret -tracksRegLiveness: true -stack: - - { id: 0, name: a, size: 16, alignment: 16 } - - { id: 1, name: b, size: 16, alignment: 16 } - - { id: 2, name: c, size: 16, alignment: 16 } - - { id: 3, name: d, size: 16, alignment: 16 } -body: | - bb.0.entry: - STGOffset $sp, %stack.0.a, 0 :: (store 16 into %ir.a) - STGOffset $sp, %stack.1.b, 0 :: (store 16 into %ir.b) - STGOffset $sp, %stack.2.c, 0 :: (store 16 into %ir.c) - $w0 = COPY $wzr - STGOffset $sp, %stack.3.d, 0 :: (store 16 into %ir.d) - RET_ReallyLR implicit killed $w0 - -... - ---- -# A store in the middle prevents merging. -# CHECK-LABEL: name:{{.*}}stg16_store_128 -# CHECK: ST2GOffset $sp, $sp, 2 -# CHECK: ST2GOffset $sp, $sp, 4 -# CHECK: ST2GOffset $sp, $sp, 6 -# CHECK: STGOffset $sp, $sp, 8 -# CHECK: STRBBui -# CHECK: ST2GOffset $sp, $sp, 0 -# CHECK: RET_ReallyLR - -name: stg16_store_128 -tracksRegLiveness: true -stack: - - { id: 0, name: a, size: 16, alignment: 16 } - - { id: 1, name: b, size: 128, alignment: 16 } -body: | - bb.0.entry: - STGOffset $sp, %stack.0.a, 0 :: (store 16 into %ir.a) - renamable $w8 = MOVi32imm 42 - ST2GOffset $sp, %stack.1.b, 6 :: (store 32 into %ir.b + 96, align 16) - ST2GOffset $sp, %stack.1.b, 4 :: (store 32 into %ir.b + 64, align 16) - ST2GOffset $sp, %stack.1.b, 2 :: (store 32 into %ir.b + 32, align 16) - STRBBui killed renamable $w8, %stack.0.a, 0 :: (store 1 into %ir.a, align 16) - ST2GOffset $sp, %stack.1.b, 0 :: (store 32 into %ir.b, align 16) - RET_ReallyLR - -... diff --git a/llvm/test/CodeGen/AArch64/settag.ll b/llvm/test/CodeGen/AArch64/settag.ll index 3deeb0155fe87..9ca188fbce325 100644 --- a/llvm/test/CodeGen/AArch64/settag.ll +++ b/llvm/test/CodeGen/AArch64/settag.ll @@ -64,8 +64,8 @@ entry: define void @stg17(i8* %p) { entry: ; CHECK-LABEL: stg17: -; CHECK: stg x0, [x0], #16 ; CHECK: mov {{(w|x)}}[[R:[0-9]+]], #256 +; CHECK: stg x0, [x0], #16 ; CHECK: st2g x0, [x0], #32 ; CHECK: sub x[[R]], x[[R]], #32 ; CHECK: cbnz x[[R]], @@ -87,8 +87,8 @@ entry: define void @stzg17(i8* %p) { entry: ; CHECK-LABEL: stzg17: -; CHECK: stzg x0, [x0], #16 ; CHECK: mov {{w|x}}[[R:[0-9]+]], #256 +; CHECK: stzg x0, [x0], #16 ; CHECK: stz2g x0, [x0], #32 ; CHECK: sub x[[R]], x[[R]], #32 ; CHECK: cbnz x[[R]], @@ -110,10 +110,10 @@ entry: define void @stg_alloca5() { entry: ; CHECK-LABEL: stg_alloca5: -; CHECK: st2g sp, [sp, #32] -; CHECK-NEXT: stg sp, [sp, #64] -; CHECK-NEXT: st2g sp, [sp], #80 -; CHECK-NEXT: ret +; CHECK: stg sp, [sp, #64] +; CHECK: st2g sp, [sp, #32] +; CHECK: st2g sp, [sp] +; CHECK: ret %a = alloca i8, i32 80, align 16 call void @llvm.aarch64.settag(i8* %a, i64 80) ret void @@ -122,11 +122,12 @@ entry: define void @stg_alloca17() { entry: ; CHECK-LABEL: stg_alloca17: +; CHECK: mov [[P:x[0-9]+]], sp +; CHECK: stg [[P]], {{\[}}[[P]]{{\]}}, #16 ; CHECK: mov {{w|x}}[[R:[0-9]+]], #256 -; CHECK: st2g sp, [sp], #32 +; CHECK: st2g [[P]], {{\[}}[[P]]{{\]}}, #32 ; CHECK: sub x[[R]], x[[R]], #32 ; CHECK: cbnz x[[R]], -; CHECK: stg sp, [sp], #16 ; CHECK: ret %a = alloca i8, i32 272, align 16 call void @llvm.aarch64.settag(i8* %a, i64 272) diff --git a/llvm/test/CodeGen/AArch64/stack-tagging-unchecked-ld-st.ll b/llvm/test/CodeGen/AArch64/stack-tagging-unchecked-ld-st.ll index ed6ccc8b49413..200837dabfe0e 100644 --- a/llvm/test/CodeGen/AArch64/stack-tagging-unchecked-ld-st.ll +++ b/llvm/test/CodeGen/AArch64/stack-tagging-unchecked-ld-st.ll @@ -210,10 +210,11 @@ entry: ; DEFAULT: ldrb [[A:w.*]], [x{{.*}}] ; DEFAULT: ldrb [[B:w.*]], [x{{.*}}] -; ALWAYS-DAG: ldg [[PA:x.*]], [x{{.*}}] -; ALWAYS-DAG: ldrb [[B:w.*]], [sp] -; ALWAYS-DAG: ldrb [[A:w.*]], {{\[}}[[PA]]{{\]}} +; ALWAYS: ldg [[PA:x.*]], [x{{.*}}] +; ALWAYS: ldrb [[B:w.*]], [sp] +; ALWAYS: ldrb [[A:w.*]], {{\[}}[[PA]]{{\]}} +; COMMON: add w0, [[B]], [[A]] ; COMMON: ret ; One of these allocas is closer to FP than to SP, and within 256 bytes diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir index 43931132107aa..b0f9cc52ae144 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir @@ -53,3 +53,116 @@ body: | S_ENDPGM 0, implicit %9 ... + +--- +name: add_neg_inline_const_64_to_sub_s32_s +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; GFX6-LABEL: name: add_neg_inline_const_64_to_sub_s32_s + ; GFX6: liveins: $sgpr0 + ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX6: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], 64, implicit-def $scc + ; GFX6: S_ENDPGM 0, implicit [[S_SUB_I32_]] + ; GFX9-LABEL: name: add_neg_inline_const_64_to_sub_s32_s + ; GFX9: liveins: $sgpr0 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], 64, implicit-def $scc + ; GFX9: S_ENDPGM 0, implicit [[S_SUB_I32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = G_CONSTANT i32 -64 + %2:sgpr(s32) = G_ADD %0, %1 + S_ENDPGM 0, implicit %2 + +... + +--- +name: add_neg_inline_const_64_to_sub_s32_v +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: add_neg_inline_const_64_to_sub_s32_v + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967232, implicit $exec + ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: S_ENDPGM 0, implicit %2 + ; GFX9-LABEL: name: add_neg_inline_const_64_to_sub_s32_v + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY]], 64, 0, implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_SUB_U32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 -64 + %2:vgpr(s32) = G_ADD %0, %1 + S_ENDPGM 0, implicit %2 + +... + +--- +name: add_neg_inline_const_16_to_sub_s32_s +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; GFX6-LABEL: name: add_neg_inline_const_16_to_sub_s32_s + ; GFX6: liveins: $sgpr0 + ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 + ; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6: S_ENDPGM 0, implicit [[S_ADD_U32_]] + ; GFX9-LABEL: name: add_neg_inline_const_16_to_sub_s32_s + ; GFX9: liveins: $sgpr0 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 + ; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9: S_ENDPGM 0, implicit [[S_ADD_U32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = G_CONSTANT i32 16 + %2:sgpr(s32) = G_ADD %0, %1 + S_ENDPGM 0, implicit %2 + +... + +--- +name: add_neg_inline_const_16_to_sub_s32_v +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: add_neg_inline_const_16_to_sub_s32_v + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16, implicit $exec + ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: S_ENDPGM 0, implicit %2 + ; GFX9-LABEL: name: add_neg_inline_const_16_to_sub_s32_v + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16, implicit $exec + ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 16 + %2:vgpr(s32) = G_ADD %0, %1 + S_ENDPGM 0, implicit %2 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir new file mode 100644 index 0000000000000..cc48e9126c9b7 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir @@ -0,0 +1,132 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s + +# Note: 16-bit instructions generally produce a 0 result in the high 16-bits on GFX8 and GFX9 and preserve high 16 bits on GFX10+ + +--- +name: add_s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: add_s16 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_ADD_U16_e64_]] + ; GFX10-LABEL: name: add_s16 + ; GFX10: liveins: $vgpr0, $vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_ADD_U16_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_ADD %2, %3 + S_ENDPGM 0, implicit %4 + +... + +--- +name: add_s16_zext_to_s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: add_s16_zext_to_s32 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_ADD_U16_e64_]] + ; GFX10-LABEL: name: add_s16_zext_to_s32 + ; GFX10: liveins: $vgpr0, $vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX10: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_ADD_U16_e64_]], 0, 16, implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_BFE_U32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_ADD %2, %3 + %5:vgpr(s32) = G_ZEXT %4 + S_ENDPGM 0, implicit %5 + +... + +--- +name: add_s16_neg_inline_const_64 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: add_s16_neg_inline_const_64 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_SUB_U16_e64_]] + ; GFX10-LABEL: name: add_s16_neg_inline_const_64 + ; GFX10: liveins: $vgpr0 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_SUB_U16_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s16) = G_TRUNC %0 + %2:vgpr(s16) = G_CONSTANT i16 -64 + %3:vgpr(s16) = G_ADD %1, %2 + S_ENDPGM 0, implicit %3 + +... + +--- +name: add_s16_neg_inline_const_64_zext_to_s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: add_s16_neg_inline_const_64_zext_to_s32 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_SUB_U16_e64_]] + ; GFX10-LABEL: name: add_s16_neg_inline_const_64_zext_to_s32 + ; GFX10: liveins: $vgpr0 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, implicit $exec + ; GFX10: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_SUB_U16_e64_]], 0, 16, implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_BFE_U32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s16) = G_TRUNC %0 + %2:vgpr(s16) = G_CONSTANT i16 -64 + %3:vgpr(s16) = G_ADD %1, %2 + %4:vgpr(s32) = G_ZEXT %3 + S_ENDPGM 0, implicit %4 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir index b3402a6051488..8d3d677b3c007 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir @@ -168,14 +168,12 @@ body: | ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX8: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_ASHRREV_I16_e64_]], 0, 16, implicit $exec - ; GFX8: S_ENDPGM 0, implicit [[V_BFE_U32_]] + ; GFX8: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX9-LABEL: name: ashr_s16_s16_vv_zext_to_s32 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX9: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_ASHRREV_I16_e64_]], 0, 16, implicit $exec - ; GFX9: S_ENDPGM 0, implicit [[V_BFE_U32_]] + ; GFX9: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] ; GFX10-LABEL: name: ashr_s16_s16_vv_zext_to_s32 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir index 866ab39fe2d19..f28e35669357b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir @@ -168,14 +168,12 @@ body: | ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX8: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_LSHRREV_B16_e64_]], 0, 16, implicit $exec - ; GFX8: S_ENDPGM 0, implicit [[V_BFE_U32_]] + ; GFX8: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX9-LABEL: name: lshr_s16_s16_vv_zext_to_s32 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX9: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_LSHRREV_B16_e64_]], 0, 16, implicit $exec - ; GFX9: S_ENDPGM 0, implicit [[V_BFE_U32_]] + ; GFX9: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] ; GFX10-LABEL: name: lshr_s16_s16_vv_zext_to_s32 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.mir new file mode 100644 index 0000000000000..b09abd4be1ad7 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.mir @@ -0,0 +1,140 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX6 %s + +--- +name: smed3_s32_vvv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX6-LABEL: name: smed3_s32_vvv + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: [[V_MED3_I32_:%[0-9]+]]:vgpr_32 = V_MED3_I32 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_MED3_I32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_SMAX %0, %1 + %4:vgpr(s32) = G_SMIN %0, %1 + %5:vgpr(s32) = G_SMAX %4, %2 + %6:vgpr(s32) = G_SMIN %3, %5 + S_ENDPGM 0, implicit %6 +... + +--- + +name: smed3_s32_sss +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2 + + ; GFX6-LABEL: name: smed3_s32_sss + ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6: [[S_MAX_I32_:%[0-9]+]]:sreg_32 = S_MAX_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6: [[S_MIN_I32_:%[0-9]+]]:sreg_32 = S_MIN_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6: [[S_MAX_I32_1:%[0-9]+]]:sreg_32 = S_MAX_I32 [[S_MIN_I32_]], [[COPY2]], implicit-def $scc + ; GFX6: [[S_MIN_I32_1:%[0-9]+]]:sreg_32 = S_MIN_I32 [[S_MAX_I32_]], [[S_MAX_I32_1]], implicit-def $scc + ; GFX6: S_ENDPGM 0, implicit [[S_MIN_I32_1]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s32) = COPY $sgpr2 + %3:sgpr(s32) = G_SMAX %0, %1 + %4:sgpr(s32) = G_SMIN %0, %1 + %5:sgpr(s32) = G_SMAX %4, %2 + %6:sgpr(s32) = G_SMIN %3, %5 + S_ENDPGM 0, implicit %6 +... + +--- +name: smed3_s32_vvv_multiuse0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX6-LABEL: name: smed3_s32_vvv_multiuse0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_MAX_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[V_MIN_I32_e64_]], [[COPY2]], implicit $exec + ; GFX6: [[V_MIN_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[V_MAX_I32_e64_]], [[V_MAX_I32_e64_1]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_MIN_I32_e64_1]], implicit [[V_MAX_I32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_SMAX %0, %1 + %4:vgpr(s32) = G_SMIN %0, %1 + %5:vgpr(s32) = G_SMAX %4, %2 + %6:vgpr(s32) = G_SMIN %3, %5 + S_ENDPGM 0, implicit %6, implicit %3 +... + +--- +name: smed3_s32_vvv_multiuse1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX6-LABEL: name: smed3_s32_vvv_multiuse1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_MAX_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[V_MIN_I32_e64_]], [[COPY2]], implicit $exec + ; GFX6: [[V_MIN_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[V_MAX_I32_e64_]], [[V_MAX_I32_e64_1]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_MIN_I32_e64_1]], implicit [[V_MIN_I32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_SMAX %0, %1 + %4:vgpr(s32) = G_SMIN %0, %1 + %5:vgpr(s32) = G_SMAX %4, %2 + %6:vgpr(s32) = G_SMIN %3, %5 + S_ENDPGM 0, implicit %6, implicit %4 +... + +--- +name: smed3_s32_vvv_multiuse2 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX6-LABEL: name: smed3_s32_vvv_multiuse2 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_MAX_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[V_MIN_I32_e64_]], [[COPY2]], implicit $exec + ; GFX6: [[V_MIN_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[V_MAX_I32_e64_]], [[V_MAX_I32_e64_1]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_MIN_I32_e64_1]], implicit [[V_MAX_I32_e64_1]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_SMAX %0, %1 + %4:vgpr(s32) = G_SMIN %0, %1 + %5:vgpr(s32) = G_SMAX %4, %2 + %6:vgpr(s32) = G_SMIN %3, %5 + S_ENDPGM 0, implicit %6, implicit %5 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir new file mode 100644 index 0000000000000..9e029ee5e066c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir @@ -0,0 +1,168 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s + +--- +name: smed3_s16_vvv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX8-LABEL: name: smed3_s16_vvv + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX8: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]] + ; GFX9-LABEL: name: smed3_s16_vvv + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[V_MED3_I16_:%[0-9]+]]:vgpr_32 = V_MED3_I16 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MED3_I16_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s16) = G_TRUNC %0 + %4:vgpr(s16) = G_TRUNC %1 + %5:vgpr(s16) = G_TRUNC %2 + + %6:vgpr(s16) = G_SMAX %3, %4 + %7:vgpr(s16) = G_SMIN %3, %4 + %8:vgpr(s16) = G_SMAX %7, %5 + %9:vgpr(s16) = G_SMIN %6, %8 + + S_ENDPGM 0, implicit %9 +... + +--- +name: smed3_s16_vvv_multiuse0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX8-LABEL: name: smed3_s16_vvv_multiuse0 + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX8: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MAX_I16_e64_]] + ; GFX9-LABEL: name: smed3_s16_vvv_multiuse0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX9: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MAX_I16_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s16) = G_TRUNC %0 + %4:vgpr(s16) = G_TRUNC %1 + %5:vgpr(s16) = G_TRUNC %2 + + %6:vgpr(s16) = G_SMAX %3, %4 + %7:vgpr(s16) = G_SMIN %3, %4 + %8:vgpr(s16) = G_SMAX %7, %5 + %9:vgpr(s16) = G_SMIN %6, %8 + + S_ENDPGM 0, implicit %9, implicit %6 +... + +--- +name: smed3_s16_vvv_multiuse1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX8-LABEL: name: smed3_s16_vvv_multiuse1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX8: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MIN_I16_e64_]] + ; GFX9-LABEL: name: smed3_s16_vvv_multiuse1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX9: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MIN_I16_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s16) = G_TRUNC %0 + %4:vgpr(s16) = G_TRUNC %1 + %5:vgpr(s16) = G_TRUNC %2 + + %6:vgpr(s16) = G_SMAX %3, %4 + %7:vgpr(s16) = G_SMIN %3, %4 + %8:vgpr(s16) = G_SMAX %7, %5 + %9:vgpr(s16) = G_SMIN %6, %8 + + S_ENDPGM 0, implicit %9, implicit %7 +... + +--- +name: smed3_s16_vvv_multiuse2 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX8-LABEL: name: smed3_s16_vvv_multiuse2 + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX8: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MAX_I16_e64_1]] + ; GFX9-LABEL: name: smed3_s16_vvv_multiuse2 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec + ; GFX9: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MAX_I16_e64_1]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s16) = G_TRUNC %0 + %4:vgpr(s16) = G_TRUNC %1 + %5:vgpr(s16) = G_TRUNC %2 + + %6:vgpr(s16) = G_SMAX %3, %4 + %7:vgpr(s16) = G_SMIN %3, %4 + %8:vgpr(s16) = G_SMAX %7, %5 + %9:vgpr(s16) = G_SMIN %6, %8 + + S_ENDPGM 0, implicit %9, implicit %8 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.mir new file mode 100644 index 0000000000000..a8341251faf64 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.mir @@ -0,0 +1,140 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX6 %s + +--- +name: umed3_s32_vvv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX6-LABEL: name: umed3_s32_vvv + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: [[V_MED3_U32_:%[0-9]+]]:vgpr_32 = V_MED3_U32 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_MED3_U32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_UMAX %0, %1 + %4:vgpr(s32) = G_UMIN %0, %1 + %5:vgpr(s32) = G_UMAX %4, %2 + %6:vgpr(s32) = G_UMIN %3, %5 + S_ENDPGM 0, implicit %6 +... + +--- + +name: umed3_s32_sss +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2 + + ; GFX6-LABEL: name: umed3_s32_sss + ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6: [[S_MAX_U32_:%[0-9]+]]:sreg_32 = S_MAX_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6: [[S_MIN_U32_:%[0-9]+]]:sreg_32 = S_MIN_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6: [[S_MAX_U32_1:%[0-9]+]]:sreg_32 = S_MAX_U32 [[S_MIN_U32_]], [[COPY2]], implicit-def $scc + ; GFX6: [[S_MIN_U32_1:%[0-9]+]]:sreg_32 = S_MIN_U32 [[S_MAX_U32_]], [[S_MAX_U32_1]], implicit-def $scc + ; GFX6: S_ENDPGM 0, implicit [[S_MIN_U32_1]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s32) = COPY $sgpr2 + %3:sgpr(s32) = G_UMAX %0, %1 + %4:sgpr(s32) = G_UMIN %0, %1 + %5:sgpr(s32) = G_UMAX %4, %2 + %6:sgpr(s32) = G_UMIN %3, %5 + S_ENDPGM 0, implicit %6 +... + +--- +name: umed3_s32_vvv_multiuse0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX6-LABEL: name: umed3_s32_vvv_multiuse0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_MAX_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[V_MIN_U32_e64_]], [[COPY2]], implicit $exec + ; GFX6: [[V_MIN_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_MAX_U32_e64_]], [[V_MAX_U32_e64_1]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_MIN_U32_e64_1]], implicit [[V_MAX_U32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_UMAX %0, %1 + %4:vgpr(s32) = G_UMIN %0, %1 + %5:vgpr(s32) = G_UMAX %4, %2 + %6:vgpr(s32) = G_UMIN %3, %5 + S_ENDPGM 0, implicit %6, implicit %3 +... + +--- +name: umed3_s32_vvv_multiuse1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX6-LABEL: name: umed3_s32_vvv_multiuse1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_MAX_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[V_MIN_U32_e64_]], [[COPY2]], implicit $exec + ; GFX6: [[V_MIN_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_MAX_U32_e64_]], [[V_MAX_U32_e64_1]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_MIN_U32_e64_1]], implicit [[V_MIN_U32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_UMAX %0, %1 + %4:vgpr(s32) = G_UMIN %0, %1 + %5:vgpr(s32) = G_UMAX %4, %2 + %6:vgpr(s32) = G_UMIN %3, %5 + S_ENDPGM 0, implicit %6, implicit %4 +... + +--- +name: umed3_s32_vvv_multiuse2 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX6-LABEL: name: umed3_s32_vvv_multiuse2 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: [[V_MAX_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_MIN_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX6: [[V_MAX_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 [[V_MIN_U32_e64_]], [[COPY2]], implicit $exec + ; GFX6: [[V_MIN_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U32_e64 [[V_MAX_U32_e64_]], [[V_MAX_U32_e64_1]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_MIN_U32_e64_1]], implicit [[V_MAX_U32_e64_1]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_UMAX %0, %1 + %4:vgpr(s32) = G_UMIN %0, %1 + %5:vgpr(s32) = G_UMAX %4, %2 + %6:vgpr(s32) = G_UMIN %3, %5 + S_ENDPGM 0, implicit %6, implicit %5 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir new file mode 100644 index 0000000000000..c323883ff6139 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir @@ -0,0 +1,168 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s + +--- +name: umed3_s16_vvv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX8-LABEL: name: umed3_s16_vvv + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX8: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]] + ; GFX9-LABEL: name: umed3_s16_vvv + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[V_MED3_U16_:%[0-9]+]]:vgpr_32 = V_MED3_U16 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MED3_U16_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s16) = G_TRUNC %0 + %4:vgpr(s16) = G_TRUNC %1 + %5:vgpr(s16) = G_TRUNC %2 + + %6:vgpr(s16) = G_UMAX %3, %4 + %7:vgpr(s16) = G_UMIN %3, %4 + %8:vgpr(s16) = G_UMAX %7, %5 + %9:vgpr(s16) = G_UMIN %6, %8 + + S_ENDPGM 0, implicit %9 +... + +--- +name: umed3_s16_vvv_multiuse0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX8-LABEL: name: umed3_s16_vvv_multiuse0 + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX8: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MAX_U16_e64_]] + ; GFX9-LABEL: name: umed3_s16_vvv_multiuse0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX9: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MAX_U16_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s16) = G_TRUNC %0 + %4:vgpr(s16) = G_TRUNC %1 + %5:vgpr(s16) = G_TRUNC %2 + + %6:vgpr(s16) = G_UMAX %3, %4 + %7:vgpr(s16) = G_UMIN %3, %4 + %8:vgpr(s16) = G_UMAX %7, %5 + %9:vgpr(s16) = G_UMIN %6, %8 + + S_ENDPGM 0, implicit %9, implicit %6 +... + +--- +name: umed3_s16_vvv_multiuse1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX8-LABEL: name: umed3_s16_vvv_multiuse1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX8: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MIN_U16_e64_]] + ; GFX9-LABEL: name: umed3_s16_vvv_multiuse1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX9: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MIN_U16_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s16) = G_TRUNC %0 + %4:vgpr(s16) = G_TRUNC %1 + %5:vgpr(s16) = G_TRUNC %2 + + %6:vgpr(s16) = G_UMAX %3, %4 + %7:vgpr(s16) = G_UMIN %3, %4 + %8:vgpr(s16) = G_UMAX %7, %5 + %9:vgpr(s16) = G_UMIN %6, %8 + + S_ENDPGM 0, implicit %9, implicit %7 +... + +--- +name: umed3_s16_vvv_multiuse2 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX8-LABEL: name: umed3_s16_vvv_multiuse2 + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX8: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX8: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MAX_U16_e64_1]] + ; GFX9-LABEL: name: umed3_s16_vvv_multiuse2 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec + ; GFX9: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MAX_U16_e64_1]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s16) = G_TRUNC %0 + %4:vgpr(s16) = G_TRUNC %1 + %5:vgpr(s16) = G_TRUNC %2 + + %6:vgpr(s16) = G_UMAX %3, %4 + %7:vgpr(s16) = G_UMIN %3, %4 + %8:vgpr(s16) = G_UMAX %7, %5 + %9:vgpr(s16) = G_UMIN %6, %8 + + S_ENDPGM 0, implicit %9, implicit %8 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir index 4321e4e7ca214..3085bb7201513 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir @@ -168,14 +168,12 @@ body: | ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX8: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_LSHLREV_B16_e64_]], 0, 16, implicit $exec - ; GFX8: S_ENDPGM 0, implicit [[V_BFE_U32_]] + ; GFX8: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] ; GFX9-LABEL: name: shl_s16_s16_vv_zext_to_s32 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec - ; GFX9: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_LSHLREV_B16_e64_]], 0, 16, implicit $exec - ; GFX9: S_ENDPGM 0, implicit [[V_BFE_U32_]] + ; GFX9: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] ; GFX10-LABEL: name: shl_s16_s16_vv_zext_to_s32 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll index dd4f892ebc231..ce71a89adacb7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll @@ -14,7 +14,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16 ; PACKED: bb.1 (%ir-block.0): @@ -27,7 +27,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -44,7 +44,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7 + 4095, align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7" + 4095, align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16 ; PACKED: bb.1 (%ir-block.0): @@ -56,7 +56,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7 + 4095, align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7" + 4095, align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -78,7 +78,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16 ; PACKED: bb.1 (%ir-block.0): @@ -91,7 +91,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -116,7 +116,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY5]], implicit $exec ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16 ; PACKED: bb.1 (%ir-block.0): @@ -131,7 +131,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -173,7 +173,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec ; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4) ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -211,7 +211,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec ; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4) ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -240,7 +240,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095 ; PACKED: bb.1 (%ir-block.0): @@ -253,7 +253,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) ret void @@ -275,7 +275,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096 ; PACKED: bb.1 (%ir-block.0): @@ -288,7 +288,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) ret void @@ -310,7 +310,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 16, align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 16, align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_16 ; PACKED: bb.1 (%ir-block.0): @@ -323,7 +323,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 16, align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 16, align 1, addrspace 4) ; PACKED: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -346,7 +346,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4095, align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4095 ; PACKED: bb.1 (%ir-block.0): @@ -359,7 +359,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4095, align 1, addrspace 4) ; PACKED: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -384,7 +384,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %23, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4096, align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %23, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4096, align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096 ; PACKED: bb.1 (%ir-block.0): @@ -399,7 +399,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; PACKED: %14:vgpr_32, dead %15:sreg_64 = V_ADD_I32_e64 [[COPY5]], killed [[V_MOV_B32_e32_]], 0, implicit $exec - ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %14, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4096, align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %14, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4096, align 1, addrspace 4) ; PACKED: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -445,7 +445,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec ; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %48, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7 + 4096, align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %48, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7" + 4096, align 1, addrspace 4) ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -485,7 +485,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec ; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %32, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7 + 4096, align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %32, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7" + 4096, align 1, addrspace 4) ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll index 75d25b0c2c469..aea37fd08b408 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll @@ -14,7 +14,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -31,7 +31,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -51,7 +51,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -72,7 +72,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 - ; CHECK: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -94,7 +94,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 - ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -132,7 +132,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -159,7 +159,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) ret void @@ -179,7 +179,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) ret void @@ -199,7 +199,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7 + 16, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7" + 16, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -220,7 +220,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7 + 4095, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7" + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -243,7 +243,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; CHECK: %16:vgpr_32, dead %17:sreg_64 = V_ADD_I32_e64 [[COPY6]], killed [[V_MOV_B32_e32_]], 0, implicit $exec - ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %16, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7 + 4096, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %16, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7" + 4096, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -286,7 +286,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %34, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 4096, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %34, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 4096, align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll index 4db5fe081fda2..c5aa36df8675d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll @@ -15,7 +15,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -36,7 +36,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -71,7 +71,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -103,7 +103,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY6]], implicit $exec - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -148,7 +148,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -173,7 +173,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1) ret void @@ -191,7 +191,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) ret void @@ -209,7 +209,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 3) ret void @@ -227,7 +227,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4) ret void @@ -245,7 +245,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6) ret void @@ -263,7 +263,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5) ret void @@ -281,7 +281,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7) ret void @@ -301,7 +301,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -322,7 +322,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 - ; CHECK: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -344,7 +344,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -362,7 +362,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom TargetCustom7, addrspace 4) + ; CHECK: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "TargetCustom7", addrspace 4) ; CHECK: S_ENDPGM 0 %val.trunc = trunc i32 %val to i8 call void @llvm.amdgcn.raw.buffer.store.i8(i8 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -381,7 +381,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %val.trunc = trunc i32 %val to i16 call void @llvm.amdgcn.raw.buffer.store.i16(i16 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -400,7 +400,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -418,7 +418,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -438,7 +438,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -474,7 +474,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -498,7 +498,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_v ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -516,7 +516,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_v ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4096, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4096, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 0) ret void @@ -534,7 +534,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 16, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 16, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -553,7 +553,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -574,7 +574,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; CHECK: %14:vgpr_32, dead %15:sreg_64 = V_ADD_I32_e64 [[COPY5]], killed [[V_MOV_B32_e32_]], 0, implicit $exec - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %14, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4096, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %14, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4096, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -593,7 +593,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) ret void @@ -611,7 +611,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) ret void @@ -629,7 +629,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 16, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 16, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -648,7 +648,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -669,7 +669,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; CHECK: %14:vgpr_32, dead %15:sreg_64 = V_ADD_I32_e64 [[COPY5]], killed [[V_MOV_B32_e32_]], 0, implicit $exec - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %14, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4096, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %14, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4096, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -707,7 +707,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %30, [[REG_SEQUENCE3]], [[COPY6]], 904, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 5000, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %30, [[REG_SEQUENCE3]], [[COPY6]], 904, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 5000, align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -750,7 +750,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 5000, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 5000, align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll index 44a17012237ec..d4a3f4025b378 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll @@ -18,7 +18,7 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 ; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8) + ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8") ; CHECK: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) ; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY9]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) ; CHECK: S_ENDPGM 0 @@ -44,7 +44,7 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) - ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8) + ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8") ; CHECK: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) ; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY10]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) ; CHECK: S_ENDPGM 0 @@ -98,7 +98,7 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { ; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec ; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8) + ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8") ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -160,7 +160,7 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg ; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec ; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8) + ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8") ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll index c59372a8d09c7..e5d67a3f88742 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll @@ -23,7 +23,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inre ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8) + ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8") ; CHECK: [[COPY13:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) ; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY13]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) ; CHECK: S_ENDPGM 0 @@ -54,7 +54,7 @@ define amdgpu_ps void @sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inre ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32) - ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8) + ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8") ; CHECK: [[COPY14:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) ; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY14]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) ; CHECK: S_ENDPGM 0 @@ -113,7 +113,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr ; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec ; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8) + ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8") ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -169,7 +169,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8) + ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8") ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -247,7 +247,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr ; CHECK: [[V_CMP_EQ_U64_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV5]](s64), [[UV5]](s64), implicit $exec ; CHECK: [[S_AND_B64_4:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_5]], [[S_AND_B64_3]], implicit-def $scc ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32), [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32) - ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8) + ; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8") ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_4]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll index 33a8e9a1284cc..4443daba2ee2d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll @@ -14,7 +14,7 @@ define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[INT]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -34,7 +34,7 @@ define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[INT]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -72,7 +72,7 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -108,7 +108,7 @@ define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %9(s32), %bb.2 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY5]](s32), implicit $exec - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -156,7 +156,7 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]](s32), implicit $exec ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll index 9bc81aecc8a1d..a657488278b04 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll @@ -15,7 +15,7 @@ define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[INT]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -37,7 +37,7 @@ define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__sgpr_val__sgpr_vindex__sg ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) ; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY8]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY8]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[INT]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -76,7 +76,7 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -113,7 +113,7 @@ define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex_vgp ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY6]](s32), implicit $exec - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -162,7 +162,7 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]](s32), implicit $exec ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll index efe81eabc3497..a6ba559382f5c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll @@ -16,7 +16,7 @@ define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -39,7 +39,7 @@ define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__sgpr_val__sgpr_vindex__sg ; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) ; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) ; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[COPY6]](s32) - ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY8]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY9]](s32), [[COPY10]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY8]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY9]](s32), [[COPY10]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -76,7 +76,7 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -111,7 +111,7 @@ define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.2 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY7]](s32), implicit $exec - ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -158,7 +158,7 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]](s32), implicit $exec ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4) + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable store 4 into custom "TargetCustom7", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll index f96a13878ba6a..ce62e041aa67e 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll @@ -10,218 +10,218 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1 ; GCN: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM killed [[REG_SEQUENCE]], 0, 0, 0 :: (dereferenceable invariant load 16 from %ir.arg0, addrspace 6) - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 16, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 16, align 1, addrspace 4) ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 - ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 32, align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 32, align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 48, align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 48, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 - ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 64, align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 64, align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 - ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 80, align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 80, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 ; GCN: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY]], %subreg.sub1 ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 96, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 96, align 1, addrspace 4) ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0 ; GCN: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4) ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF1]].sub0 ; GCN: [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4) ; GCN: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[DEF2]].sub0 ; GCN: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4) ; GCN: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[DEF3]].sub0 ; GCN: INLINEASM &"", 1 ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec - ; GCN: BUFFER_ATOMIC_ADD_F32_OFFSET [[V_MOV_B32_e32_1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (load store 4 on custom TargetCustom7 + 112, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_F32_OFFEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (load store 4 on custom TargetCustom7, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (load store 4 on custom TargetCustom7, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (load store 4 on custom TargetCustom7, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_F32_OFFSET [[V_MOV_B32_e32_1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (load store 4 on custom "TargetCustom7" + 112, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_F32_OFFEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (load store 4 on custom "TargetCustom7", addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (load store 4 on custom "TargetCustom7", addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (load store 4 on custom "TargetCustom7", addrspace 4) ; GCN: INLINEASM &"", 1 - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 128, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 128, align 1, addrspace 4) ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 64 - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_1]], 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 128, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_1]], 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 128, align 1, addrspace 4) ; GCN: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 128 - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 128, align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 128, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4) ; GCN: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY6]], 128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY6]], 128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 144, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 144, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 144, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 144, align 1, addrspace 4) ; GCN: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 72 - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_3]], 72, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 144, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_3]], 72, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 144, align 1, addrspace 4) ; GCN: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 144 - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 144, align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 144, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4) ; GCN: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY7]], 144, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY7]], 144, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 - ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 160, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 160, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 160, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 160, align 1, addrspace 4) ; GCN: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 80 - ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_5]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 160, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_5]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 160, align 1, addrspace 4) ; GCN: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 160 - ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 160, align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 160, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4) ; GCN: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[COPY8]], 160, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[COPY8]], 160, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 ; GCN: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 176, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 176, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 176, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 176, align 1, addrspace 4) ; GCN: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[DEF4]].sub0 ; GCN: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 88 ; GCN: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_7]], 88, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 176, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_7]], 88, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 176, align 1, addrspace 4) ; GCN: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[DEF5]].sub0 ; GCN: [[S_MOV_B32_8:%[0-9]+]]:sreg_32 = S_MOV_B32 176 ; GCN: [[DEF6:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 176, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 176, align 1, addrspace 4) ; GCN: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[DEF6]].sub0 ; GCN: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4) ; GCN: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[DEF7]].sub0 ; GCN: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[COPY13]], 176, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[COPY13]], 176, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4) ; GCN: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[DEF8]].sub0 ; GCN: INLINEASM &"", 1 - ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 192, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 192, align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 192, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 192, align 1, addrspace 4) ; GCN: [[S_MOV_B32_9:%[0-9]+]]:sreg_32 = S_MOV_B32 96 - ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_9]], 96, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 192, align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_9]], 96, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 192, align 1, addrspace 4) ; GCN: [[S_MOV_B32_10:%[0-9]+]]:sreg_32 = S_MOV_B32 192 - ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 192, align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 192, align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4) ; GCN: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY15]], 192, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY15]], 192, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 - ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 208, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 208, align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 208, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 208, align 1, addrspace 4) ; GCN: [[S_MOV_B32_11:%[0-9]+]]:sreg_32 = S_MOV_B32 104 - ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_11]], 104, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 208, align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_11]], 104, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 208, align 1, addrspace 4) ; GCN: [[S_MOV_B32_12:%[0-9]+]]:sreg_32 = S_MOV_B32 208 - ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 208, align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 208, align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4) ; GCN: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY16]], 208, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY16]], 208, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 ; GCN: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY17]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 224, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY17]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 224, align 1, addrspace 4) ; GCN: [[S_MOV_B32_13:%[0-9]+]]:sreg_32 = S_MOV_B32 112 ; GCN: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY18]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_13]], 112, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 224, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY18]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_13]], 112, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 224, align 1, addrspace 4) ; GCN: [[S_MOV_B32_14:%[0-9]+]]:sreg_32 = S_MOV_B32 224 ; GCN: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY19]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 224, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY19]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 224, align 1, addrspace 4) ; GCN: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GCN: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4) ; GCN: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[COPY21:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY20]], [[S_LOAD_DWORDX4_IMM]], [[COPY21]], 224, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY20]], [[S_LOAD_DWORDX4_IMM]], [[COPY21]], 224, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 ; GCN: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY22]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 240, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY22]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 240, align 1, addrspace 4) ; GCN: [[S_MOV_B32_15:%[0-9]+]]:sreg_32 = S_MOV_B32 120 ; GCN: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY23]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_15]], 120, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 240, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY23]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_15]], 120, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 240, align 1, addrspace 4) ; GCN: [[S_MOV_B32_16:%[0-9]+]]:sreg_32 = S_MOV_B32 240 ; GCN: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY24]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7 + 240, align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY24]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7" + 240, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4) ; GCN: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[COPY26:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY25]], [[S_LOAD_DWORDX4_IMM]], [[COPY26]], 240, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY25]], [[S_LOAD_DWORDX4_IMM]], [[COPY26]], 240, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 ; GCN: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY27]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 256, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY27]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 256, align 1, addrspace 4) ; GCN: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY28]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 128, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 256, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY28]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 128, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 256, align 1, addrspace 4) ; GCN: [[S_MOV_B32_17:%[0-9]+]]:sreg_32 = S_MOV_B32 256 ; GCN: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY29]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 256, align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_BOTHEN [[COPY]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY29]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 256, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_BOTHEN [[COPY]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4) ; GCN: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[COPY31:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY30]], [[S_LOAD_DWORDX4_IMM]], [[COPY31]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY30]], [[S_LOAD_DWORDX4_IMM]], [[COPY31]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 ; GCN: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[DEF9:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY32]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 272, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY32]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 272, align 1, addrspace 4) ; GCN: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[DEF9]].sub0 ; GCN: [[S_MOV_B32_18:%[0-9]+]]:sreg_32 = S_MOV_B32 136 ; GCN: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[DEF10:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY34]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_18]], 136, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 272, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY34]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_18]], 136, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 272, align 1, addrspace 4) ; GCN: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[DEF10]].sub0 ; GCN: [[S_MOV_B32_19:%[0-9]+]]:sreg_32 = S_MOV_B32 272 ; GCN: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[DEF11:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY36]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7 + 272, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY36]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7" + 272, align 1, addrspace 4) ; GCN: [[COPY37:%[0-9]+]]:vgpr_32 = COPY [[DEF11]].sub0 ; GCN: [[DEF12:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4) ; GCN: [[COPY38:%[0-9]+]]:vgpr_32 = COPY [[DEF12]].sub0 ; GCN: [[COPY39:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[COPY40:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN: [[DEF13:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY39]], [[S_LOAD_DWORDX4_IMM]], [[COPY40]], 272, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY39]], [[S_LOAD_DWORDX4_IMM]], [[COPY40]], 272, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4) ; GCN: [[COPY41:%[0-9]+]]:vgpr_32 = COPY [[DEF13]].sub0 ; GCN: [[DEF14:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4) ; GCN: [[COPY42:%[0-9]+]]:vgpr_32 = COPY [[DEF14]].sub0 ; GCN: [[DEF15:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "TargetCustom7", align 1, addrspace 4) ; GCN: [[COPY43:%[0-9]+]]:vgpr_32 = COPY [[DEF15]].sub0 ; GCN: INLINEASM &"", 1 ; GCN: [[COPY44:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN2]], [[COPY44]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 288, align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN2]], [[COPY44]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 288, align 1, addrspace 4) ; GCN: [[COPY45:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN3]], [[COPY45]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 144, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 288, align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN3]], [[COPY45]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 144, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 288, align 1, addrspace 4) ; GCN: [[S_MOV_B32_20:%[0-9]+]]:sreg_32 = S_MOV_B32 288 ; GCN: [[COPY46:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN4]], [[COPY46]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 288, align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_BOTHEN_exact killed [[BUFFER_LOAD_DWORDX4_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN4]], [[COPY46]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 288, align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_BOTHEN_exact killed [[BUFFER_LOAD_DWORDX4_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4) ; GCN: [[COPY47:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[COPY48:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN5]], [[COPY47]], [[S_LOAD_DWORDX4_IMM]], [[COPY48]], 288, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN5]], [[COPY47]], [[S_LOAD_DWORDX4_IMM]], [[COPY48]], 288, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 ; GCN: [[COPY49:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2]], [[COPY49]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 304, align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2]], [[COPY49]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 304, align 1, addrspace 4) ; GCN: [[S_MOV_B32_21:%[0-9]+]]:sreg_32 = S_MOV_B32 152 ; GCN: [[COPY50:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3]], [[COPY50]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_21]], 152, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 304, align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3]], [[COPY50]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_21]], 152, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 304, align 1, addrspace 4) ; GCN: [[S_MOV_B32_22:%[0-9]+]]:sreg_32 = S_MOV_B32 304 ; GCN: [[COPY51:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4]], [[COPY51]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7 + 304, align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4]], [[COPY51]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7" + 304, align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4) ; GCN: [[COPY52:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[COPY53:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5]], [[COPY52]], [[S_LOAD_DWORDX4_IMM]], [[COPY53]], 304, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5]], [[COPY52]], [[S_LOAD_DWORDX4_IMM]], [[COPY53]], 304, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "TargetCustom7", align 1, addrspace 4) ; GCN: S_ENDPGM 0 bb.0: %tmp0 = load <4 x i32>, <4 x i32> addrspace(6)* %arg0, align 16, !invariant.load !0 diff --git a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll index 0c264251942a4..93322c7da4f86 100644 --- a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll @@ -12,7 +12,7 @@ define amdgpu_hs void @main([0 x i8] addrspace(6)* inreg %arg) { ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]] ; GCN: [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "TargetCustom7", align 1, addrspace 4) ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 @@ -21,7 +21,7 @@ define amdgpu_hs void @main([0 x i8] addrspace(6)* inreg %arg) { ; GCN: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GCN: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[DEF2]] ; GCN: [[DEF3:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GCN: BUFFER_STORE_DWORDX3_OFFEN_exact killed [[COPY4]], [[COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom TargetCustom7, align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX3_OFFEN_exact killed [[COPY4]], [[COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "TargetCustom7", align 1, addrspace 4) ; GCN: S_ENDPGM 0 main_body: %tmp25 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> undef, i32 undef, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll index cdcf7383afc09..068e3d98f17be 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll @@ -27,7 +27,7 @@ ; MIR-LABEL: name: gws_barrier_offset0{{$}} ; MIR: BUNDLE implicit{{( killed)?( renamable)?}} $vgpr0, implicit $m0, implicit $exec { -; MIR-NEXT: DS_GWS_BARRIER renamable $vgpr0, 0, -1, implicit $m0, implicit $exec :: (load 4 from custom GWSResource) +; MIR-NEXT: DS_GWS_BARRIER renamable $vgpr0, 0, -1, implicit $m0, implicit $exec :: (load 4 from custom "GWSResource") ; MIR-NEXT: S_WAITCNT 0 ; MIR-NEXT: } define amdgpu_kernel void @gws_barrier_offset0(i32 %val) #0 { diff --git a/llvm/test/CodeGen/ARM/softfp-constant-comparison.ll b/llvm/test/CodeGen/ARM/softfp-constant-comparison.ll index f70e9f378f3b0..e7e4d2bf26f2b 100644 --- a/llvm/test/CodeGen/ARM/softfp-constant-comparison.ll +++ b/llvm/test/CodeGen/ARM/softfp-constant-comparison.ll @@ -43,4 +43,4 @@ land.end: ; preds = %land.rhs, %entry ret void } -attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m4" "target-features"="+armv7e-m,+dsp,+fp16,+fpregs,+hwdiv,+thumb-mode,+vfp2sp,+vfp3d16sp,+vfp4d16sp,-aes,-crc,-crypto,-dotprod,-fp16fml,-fullfp16,-hwdiv-arm,-lob,-mve,-mve.fp,-ras,-sb,-sha2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m4" "target-features"="+armv7e-m,+dsp,+fp16,+hwdiv,+thumb-mode,+vfp2sp,+vfp3d16sp,+vfp4d16sp,-aes,-crc,-crypto,-dotprod,-fp16fml,-fullfp16,-hwdiv-arm,-lob,-mve,-mve.fp,-ras,-sb,-sha2" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/PowerPC/combine-fneg.ll b/llvm/test/CodeGen/PowerPC/combine-fneg.ll index 1d85f4f9680ae..14bace2f95f8b 100644 --- a/llvm/test/CodeGen/PowerPC/combine-fneg.ll +++ b/llvm/test/CodeGen/PowerPC/combine-fneg.ll @@ -5,19 +5,16 @@ define <4 x double> @fneg_fdiv_splat(double %a0, <4 x double> %a1) { ; CHECK-LABEL: fneg_fdiv_splat: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha ; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-NEXT: xxspltd 0, 1, 0 -; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l ; CHECK-NEXT: lxvd2x 1, 0, 3 -; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha ; CHECK-NEXT: xvredp 2, 0 -; CHECK-NEXT: addi 3, 3, .LCPI0_1@toc@l -; CHECK-NEXT: xxswapd 1, 1 -; CHECK-NEXT: xvnmsubadp 1, 0, 2 -; CHECK-NEXT: xvmaddadp 2, 2, 1 -; CHECK-NEXT: lxvd2x 1, 0, 3 ; CHECK-NEXT: xxswapd 1, 1 +; CHECK-NEXT: xxlor 3, 1, 1 +; CHECK-NEXT: xvmaddadp 3, 0, 2 +; CHECK-NEXT: xvnmsubadp 2, 2, 3 ; CHECK-NEXT: xvmaddadp 1, 0, 2 ; CHECK-NEXT: xvmsubadp 2, 2, 1 ; CHECK-NEXT: xvmuldp 34, 34, 2 diff --git a/llvm/test/CodeGen/PowerPC/fma-combine.ll b/llvm/test/CodeGen/PowerPC/fma-combine.ll index 5baf663481d70..88da295201fea 100644 --- a/llvm/test/CodeGen/PowerPC/fma-combine.ll +++ b/llvm/test/CodeGen/PowerPC/fma-combine.ll @@ -8,14 +8,12 @@ define double @fma_combine1(double %a, double %b, double %c) { ; CHECK-FAST-LABEL: fma_combine1: ; CHECK-FAST: # %bb.0: # %entry -; CHECK-FAST-NEXT: xsnegdp 0, 3 -; CHECK-FAST-NEXT: xsmsubadp 1, 0, 2 +; CHECK-FAST-NEXT: xsnmaddadp 1, 3, 2 ; CHECK-FAST-NEXT: blr ; ; CHECK-FAST-NOVSX-LABEL: fma_combine1: ; CHECK-FAST-NOVSX: # %bb.0: # %entry -; CHECK-FAST-NOVSX-NEXT: fneg 0, 3 -; CHECK-FAST-NOVSX-NEXT: fmsub 1, 0, 2, 1 +; CHECK-FAST-NOVSX-NEXT: fnmadd 1, 3, 2, 1 ; CHECK-FAST-NOVSX-NEXT: blr ; ; CHECK-LABEL: fma_combine1: @@ -34,14 +32,12 @@ entry: define double @fma_combine2(double %a, double %b, double %c) { ; CHECK-FAST-LABEL: fma_combine2: ; CHECK-FAST: # %bb.0: # %entry -; CHECK-FAST-NEXT: xsnegdp 0, 3 -; CHECK-FAST-NEXT: xsmsubadp 1, 2, 0 +; CHECK-FAST-NEXT: xsnmaddadp 1, 2, 3 ; CHECK-FAST-NEXT: blr ; ; CHECK-FAST-NOVSX-LABEL: fma_combine2: ; CHECK-FAST-NOVSX: # %bb.0: # %entry -; CHECK-FAST-NOVSX-NEXT: fneg 0, 3 -; CHECK-FAST-NOVSX-NEXT: fmsub 1, 2, 0, 1 +; CHECK-FAST-NOVSX-NEXT: fnmadd 1, 2, 3, 1 ; CHECK-FAST-NOVSX-NEXT: blr ; ; CHECK-LABEL: fma_combine2: @@ -62,25 +58,25 @@ entry: define double @fma_combine_two_uses(double %a, double %b, double %c) { ; CHECK-FAST-LABEL: fma_combine_two_uses: ; CHECK-FAST: # %bb.0: # %entry -; CHECK-FAST-NEXT: xsnegdp 0, 3 +; CHECK-FAST-NEXT: xsnegdp 0, 1 ; CHECK-FAST-NEXT: addis 3, 2, v@toc@ha ; CHECK-FAST-NEXT: addis 4, 2, z@toc@ha -; CHECK-FAST-NEXT: xsnegdp 3, 1 -; CHECK-FAST-NEXT: xsmsubadp 1, 0, 2 -; CHECK-FAST-NEXT: stfd 0, z@toc@l(4) -; CHECK-FAST-NEXT: stfd 3, v@toc@l(3) +; CHECK-FAST-NEXT: xsnmaddadp 1, 3, 2 +; CHECK-FAST-NEXT: xsnegdp 2, 3 +; CHECK-FAST-NEXT: stfd 0, v@toc@l(3) +; CHECK-FAST-NEXT: stfd 2, z@toc@l(4) ; CHECK-FAST-NEXT: blr ; ; CHECK-FAST-NOVSX-LABEL: fma_combine_two_uses: ; CHECK-FAST-NOVSX: # %bb.0: # %entry -; CHECK-FAST-NOVSX-NEXT: fneg 3, 3 +; CHECK-FAST-NOVSX-NEXT: fnmadd 0, 3, 2, 1 +; CHECK-FAST-NOVSX-NEXT: fneg 2, 1 ; CHECK-FAST-NOVSX-NEXT: addis 3, 2, v@toc@ha ; CHECK-FAST-NOVSX-NEXT: addis 4, 2, z@toc@ha -; CHECK-FAST-NOVSX-NEXT: fmsub 0, 3, 2, 1 -; CHECK-FAST-NOVSX-NEXT: fneg 2, 1 -; CHECK-FAST-NOVSX-NEXT: stfd 3, z@toc@l(4) +; CHECK-FAST-NOVSX-NEXT: fneg 3, 3 ; CHECK-FAST-NOVSX-NEXT: fmr 1, 0 ; CHECK-FAST-NOVSX-NEXT: stfd 2, v@toc@l(3) +; CHECK-FAST-NOVSX-NEXT: stfd 3, z@toc@l(4) ; CHECK-FAST-NOVSX-NEXT: blr ; ; CHECK-LABEL: fma_combine_two_uses: @@ -108,19 +104,17 @@ entry: define double @fma_combine_one_use(double %a, double %b, double %c) { ; CHECK-FAST-LABEL: fma_combine_one_use: ; CHECK-FAST: # %bb.0: # %entry -; CHECK-FAST-NEXT: xsnegdp 0, 3 +; CHECK-FAST-NEXT: xsnegdp 0, 1 ; CHECK-FAST-NEXT: addis 3, 2, v@toc@ha -; CHECK-FAST-NEXT: xsnegdp 3, 1 -; CHECK-FAST-NEXT: xsmsubadp 1, 0, 2 -; CHECK-FAST-NEXT: stfd 3, v@toc@l(3) +; CHECK-FAST-NEXT: xsnmaddadp 1, 3, 2 +; CHECK-FAST-NEXT: stfd 0, v@toc@l(3) ; CHECK-FAST-NEXT: blr ; ; CHECK-FAST-NOVSX-LABEL: fma_combine_one_use: ; CHECK-FAST-NOVSX: # %bb.0: # %entry -; CHECK-FAST-NOVSX-NEXT: fneg 0, 3 -; CHECK-FAST-NOVSX-NEXT: addis 3, 2, v@toc@ha -; CHECK-FAST-NOVSX-NEXT: fmsub 0, 0, 2, 1 +; CHECK-FAST-NOVSX-NEXT: fnmadd 0, 3, 2, 1 ; CHECK-FAST-NOVSX-NEXT: fneg 2, 1 +; CHECK-FAST-NOVSX-NEXT: addis 3, 2, v@toc@ha ; CHECK-FAST-NOVSX-NEXT: fmr 1, 0 ; CHECK-FAST-NOVSX-NEXT: stfd 2, v@toc@l(3) ; CHECK-FAST-NOVSX-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/fold-rlwinm.mir b/llvm/test/CodeGen/PowerPC/fold-rlwinm.mir index f2e576ed73b63..410f688204c31 100644 --- a/llvm/test/CodeGen/PowerPC/fold-rlwinm.mir +++ b/llvm/test/CodeGen/PowerPC/fold-rlwinm.mir @@ -118,7 +118,7 @@ body: | %0:g8rc = COPY $x3 %1:gprc = COPY %0.sub_32:g8rc %2:gprc = RLWINM %1:gprc, 27, 5, 10 - ; CHECK: %2:gprc = RLWINM %1, 27, 5, 10 + ; CHECK-NOT: RLWINM %1, %3:gprc = RLWINM %2:gprc, 8, 5, 10 ; CHECK: %3:gprc = LI 0 BLR8 implicit $lr8, implicit $rm @@ -133,9 +133,24 @@ body: | %0:g8rc = COPY $x3 %1:gprc = COPY %0.sub_32:g8rc %2:gprc = RLWINM %1:gprc, 27, 5, 10 - ; CHECK: %2:gprc = RLWINM %1, 27, 5, 10 + ; CHECK-NOT: RLWINM %1, %3:gprc = RLWINM_rec %2:gprc, 8, 5, 10, implicit-def $cr0 - ; CHECK: %3:gprc = ANDI_rec %2, 0, implicit-def $cr0 + ; CHECK: %3:gprc = ANDI_rec %1, 0, implicit-def $cr0 + BLR8 implicit $lr8, implicit $rm +... +--- +name: testFoldRLWINMoToZeroSrcCanNotBeDeleted +#CHECK : name : testFoldRLWINMoToZeroSrcCanNotBeDeleted +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3 + %0:g8rc = COPY $x3 + %1:gprc = COPY %0.sub_32:g8rc + %2:gprc = RLWINM_rec %1:gprc, 27, 5, 10, implicit-def $cr0 + ; CHECK: %2:gprc = RLWINM_rec %1, 27, 5, 10, implicit-def $cr0 + %3:gprc = RLWINM_rec %2:gprc, 8, 5, 10, implicit-def $cr0 + ; CHECK: %3:gprc = ANDI_rec %1, 0, implicit-def $cr0 BLR8 implicit $lr8, implicit $rm ... --- diff --git a/llvm/test/CodeGen/PowerPC/qpx-recipest.ll b/llvm/test/CodeGen/PowerPC/qpx-recipest.ll index 3bfd92a2e5b36..246bec1918ef2 100644 --- a/llvm/test/CodeGen/PowerPC/qpx-recipest.ll +++ b/llvm/test/CodeGen/PowerPC/qpx-recipest.ll @@ -229,8 +229,8 @@ define <4 x double> @foo2_fmf(<4 x double> %a, <4 x double> %b) nounwind { ; CHECK-NEXT: qvfre 3, 2 ; CHECK-NEXT: addi 3, 3, .LCPI8_0@toc@l ; CHECK-NEXT: qvlfdx 0, 0, 3 -; CHECK-NEXT: qvfnmsub 0, 2, 3, 0 -; CHECK-NEXT: qvfmadd 0, 3, 0, 3 +; CHECK-NEXT: qvfmadd 0, 2, 3, 0 +; CHECK-NEXT: qvfnmsub 0, 3, 0, 3 ; CHECK-NEXT: qvfmul 3, 1, 0 ; CHECK-NEXT: qvfnmsub 1, 2, 3, 1 ; CHECK-NEXT: qvfmadd 1, 0, 1, 3 diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll index bc33617662e47..a0afb4b6e12dc 100644 --- a/llvm/test/CodeGen/PowerPC/recipest.ll +++ b/llvm/test/CodeGen/PowerPC/recipest.ll @@ -194,8 +194,8 @@ define <4 x float> @hoo_safe(<4 x float> %a, <4 x float> %b) nounwind { define double @foo2_fmf(double %a, double %b) nounwind { ; CHECK: @foo2_fmf ; CHECK-DAG: fre -; CHECK-DAG: fnmsub -; CHECK: fmadd +; CHECK-DAG: fmadd +; CHECK: fnmsub ; CHECK-NEXT: fmul ; CHECK-NEXT: fnmsub ; CHECK-NEXT: fmadd diff --git a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll index 9ab320cd1eacf..2cdf832838a8d 100644 --- a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll +++ b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll @@ -13,9 +13,9 @@ define <4 x float> @repeated_fp_divisor(float %a, <4 x float> %b) { ; CHECK-NEXT: lvx 4, 0, 3 ; CHECK-NEXT: xxspltw 0, 0, 0 ; CHECK-NEXT: xvresp 1, 0 -; CHECK-NEXT: xvnmsubasp 35, 0, 1 +; CHECK-NEXT: xvmaddasp 35, 0, 1 ; CHECK-NEXT: xvmulsp 0, 34, 36 -; CHECK-NEXT: xvmaddasp 1, 1, 35 +; CHECK-NEXT: xvnmsubasp 1, 1, 35 ; CHECK-NEXT: xvmulsp 34, 0, 1 ; CHECK-NEXT: blr %ins = insertelement <4 x float> undef, float %a, i32 0 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update.mir index 976c5f5d7ba36..6dd8caafc33e7 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update.mir @@ -60,7 +60,7 @@ declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #3 declare void @llvm.stackprotector(i8*, i8**) #5 - attributes #0 = { nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+fp-armv8d16sp,+fp16,+fpregs,+fullfp16,+hwdiv,+lob,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2sp,+vfp3d16sp,+vfp4d16sp" "unsafe-fp-math"="true" "use-soft-float"="false" } + attributes #0 = { nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+fp-armv8d16sp,+fp16,+fullfp16,+hwdiv,+lob,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2sp,+vfp3d16sp,+vfp4d16sp" "unsafe-fp-math"="true" "use-soft-float"="false" } attributes #1 = { noduplicate nounwind } attributes #2 = { nounwind readnone } attributes #3 = { argmemonly nounwind willreturn } diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update2.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update2.mir index 9e429040db4fd..d49a1e86109b8 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update2.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update2.mir @@ -62,7 +62,7 @@ declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #3 declare void @llvm.stackprotector(i8*, i8**) #5 - attributes #0 = { nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+fp-armv8d16sp,+fp16,+fpregs,+fullfp16,+hwdiv,+lob,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2sp,+vfp3d16sp,+vfp4d16sp" "unsafe-fp-math"="true" "use-soft-float"="false" } + attributes #0 = { nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+fp-armv8d16sp,+fp16,+fullfp16,+hwdiv,+lob,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2sp,+vfp3d16sp,+vfp4d16sp" "unsafe-fp-math"="true" "use-soft-float"="false" } attributes #1 = { noduplicate nounwind } attributes #2 = { nounwind readnone } attributes #3 = { argmemonly nounwind willreturn } diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update3.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update3.mir index ab7fcf843d7dc..bf1c40fb34e5f 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update3.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update3.mir @@ -62,7 +62,7 @@ declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #3 declare void @llvm.stackprotector(i8*, i8**) #5 - attributes #0 = { nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+fp-armv8d16sp,+fp16,+fpregs,+fullfp16,+hwdiv,+lob,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2sp,+vfp3d16sp,+vfp4d16sp" "unsafe-fp-math"="true" "use-soft-float"="false" } + attributes #0 = { nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+fp-armv8d16sp,+fp16,+fullfp16,+hwdiv,+lob,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2sp,+vfp3d16sp,+vfp4d16sp" "unsafe-fp-math"="true" "use-soft-float"="false" } attributes #1 = { noduplicate nounwind } attributes #2 = { nounwind readnone } attributes #3 = { argmemonly nounwind willreturn } diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll index ddf51b785ff0d..e25e0298eb243 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp,+fp-armv8d16sp,+fp16,+fpregs,+fullfp16 -disable-mve-tail-predication=false %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp,+fp-armv8d16sp,+fp16,+fullfp16 -disable-mve-tail-predication=false %s -o - | FileCheck %s define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocapture readonly %b, float* nocapture readonly %c, i32 %N) { ; CHECK-LABEL: fast_float_mul: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll index ebb041d937224..b152191798bcf 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp,+fp-armv8d16sp,+fp16,+fpregs,+fullfp16 %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp,+fp-armv8d16sp,+fp16,+fullfp16 %s -o - | FileCheck %s define arm_aapcs_vfpcc void @float_float_mul(float* nocapture readonly %a, float* nocapture readonly %b, float* nocapture %c, i32 %N) { ; CHECK-LABEL: float_float_mul: diff --git a/llvm/test/CodeGen/VE/lit.local.cfg b/llvm/test/CodeGen/VE/lit.local.cfg new file mode 100644 index 0000000000000..b6366779272df --- /dev/null +++ b/llvm/test/CodeGen/VE/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'VE' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/CodeGen/VE/target_support.ll b/llvm/test/CodeGen/VE/target_support.ll new file mode 100644 index 0000000000000..336d9cd367208 --- /dev/null +++ b/llvm/test/CodeGen/VE/target_support.ll @@ -0,0 +1,2 @@ +; RUN: llc --version | FileCheck %s +; CHECK: ve - VE diff --git a/llvm/test/CodeGen/X86/avg.ll b/llvm/test/CodeGen/X86/avg.ll index a5fd84c32ed51..e4a5d1392c0bf 100644 --- a/llvm/test/CodeGen/X86/avg.ll +++ b/llvm/test/CodeGen/X86/avg.ll @@ -462,14 +462,12 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind { ; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 ; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm1 ; AVX512BW-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512BW-NEXT: vpavgb 16(%rsi), %xmm1, %xmm1 +; AVX512BW-NEXT: vpavgb 32(%rsi), %xmm2, %xmm2 ; AVX512BW-NEXT: vpavgb (%rsi), %xmm0, %xmm0 -; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpavgb 32(%rsi), %xmm2, %xmm1 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1 -; AVX512BW-NEXT: vmovdqu %ymm0, (%rax) -; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, (%rax) -; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: vpavgb 16(%rsi), %xmm1, %xmm1 +; AVX512BW-NEXT: vmovdqu %xmm1, (%rax) +; AVX512BW-NEXT: vmovdqu %xmm0, (%rax) +; AVX512BW-NEXT: vmovdqu %xmm2, (%rax) ; AVX512BW-NEXT: retq %1 = load <48 x i8>, <48 x i8>* %a %2 = load <48 x i8>, <48 x i8>* %b diff --git a/llvm/test/CodeGen/X86/copy-eflags-liveinlists.mir b/llvm/test/CodeGen/X86/copy-eflags-liveinlists.mir new file mode 100644 index 0000000000000..54454fe0017f1 --- /dev/null +++ b/llvm/test/CodeGen/X86/copy-eflags-liveinlists.mir @@ -0,0 +1,92 @@ +# RUN: llc -mtriple=i686-unknown-unknown -run-pass=x86-flags-copy-lowering \ +# RUN: -print-after=x86-flags-copy-lowering %s -o - | FileCheck %s +# +# Check that $eflags is removed from live-in lists of successor blocks. +# +# CHECK-NOT: liveins: $eflags + +--- | + define void @fun(i16 %arg, i64 %arg1, i8 %arg2, i8* %arg3, i32 %arg4) { ret void} +... +--- +name: fun +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr16 } + - { id: 1, class: gr16 } + - { id: 2, class: gr16 } + - { id: 3, class: gr32 } + - { id: 4, class: gr32 } + - { id: 5, class: gr8 } + - { id: 6, class: gr32 } + - { id: 7, class: gr32 } + - { id: 8, class: gr32 } + - { id: 9, class: gr32 } + - { id: 10, class: gr32 } + - { id: 11, class: gr32 } + - { id: 12, class: gr32 } + - { id: 13, class: gr8 } + - { id: 14, class: gr32 } + - { id: 15, class: gr32 } + - { id: 16, class: gr32_abcd } + - { id: 17, class: gr8 } + - { id: 18, class: gr8 } + - { id: 19, class: gr32 } + - { id: 20, class: gr32 } +frameInfo: + maxAlignment: 4 +fixedStack: + - { id: 0, offset: 20, size: 4, alignment: 4, isImmutable: true } + - { id: 1, offset: 16, size: 4, alignment: 4, isImmutable: true } + - { id: 2, offset: 12, size: 1, alignment: 4, isImmutable: true } + - { id: 3, offset: 8, size: 4, alignment: 4, isImmutable: true } + - { id: 4, offset: 4, size: 4, alignment: 4, isImmutable: true } + - { id: 5, size: 2, alignment: 4, isImmutable: true } +machineFunctionInfo: {} +body: | + bb.0: + %4:gr32 = MOV32rm %fixed-stack.3, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.3) + %3:gr32 = MOV32rm %fixed-stack.4, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.4) + %7:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0) + %6:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.1) + %5:gr8 = MOV8rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load 1 from %fixed-stack.2, align 4) + %9:gr32 = IMPLICIT_DEF + %11:gr32 = IMPLICIT_DEF + + bb.1: + successors: %bb.2, %bb.3 + + CMP32rr %3, %9, implicit-def $eflags + %10:gr32 = SBB32rr %4, %11, implicit-def $eflags, implicit $eflags + %12:gr32 = COPY $eflags + %13:gr8 = SETCCr 12, implicit $eflags + %14:gr32 = MOVZX32rr8 killed %13 + %15:gr32 = NEG32r %14, implicit-def dead $eflags + %16:gr32_abcd = MOV32r0 implicit-def dead $eflags + $eflags = COPY %12 + %17:gr8 = COPY %16.sub_8bit + JCC_1 %bb.3, 12, implicit $eflags + + bb.2: + liveins: $eflags + + + bb.3: + successors: %bb.4, %bb.5 + liveins: $eflags + + %18:gr8 = PHI %5, %bb.2, %17, %bb.1 + MOV8mr %6, 1, $noreg, 0, $noreg, killed %18 :: (volatile store 1 into %ir.arg3) + JCC_1 %bb.5, 12, implicit $eflags + + bb.4: + + bb.5: + %19:gr32 = PHI %16, %bb.4, %15, %bb.3 + $eax = COPY %7 + CDQ implicit-def $eax, implicit-def $edx, implicit $eax + IDIV32r killed %19, implicit-def dead $eax, implicit-def $edx, implicit-def dead $eflags, implicit $eax, implicit $edx + JMP_1 %bb.1 + +... diff --git a/llvm/test/CodeGen/X86/pr34657.ll b/llvm/test/CodeGen/X86/pr34657.ll index d8b72920fed15..9761927dc239b 100644 --- a/llvm/test/CodeGen/X86/pr34657.ll +++ b/llvm/test/CodeGen/X86/pr34657.ll @@ -5,13 +5,12 @@ define <112 x i8> @pr34657(<112 x i8>* %src) local_unnamed_addr { ; CHECK-LABEL: pr34657: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: vmovups 64(%rsi), %ymm0 -; CHECK-NEXT: vbroadcastf128 {{.*#+}} ymm1 = mem[0,1,0,1] -; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm1 -; CHECK-NEXT: vmovups (%rsi), %zmm2 -; CHECK-NEXT: vmovaps %ymm0, 64(%rdi) -; CHECK-NEXT: vmovaps %zmm2, (%rdi) -; CHECK-NEXT: vextractf32x4 $2, %zmm1, 96(%rdi) +; CHECK-NEXT: vmovups (%rsi), %zmm0 +; CHECK-NEXT: vmovups 64(%rsi), %ymm1 +; CHECK-NEXT: vmovups 96(%rsi), %xmm2 +; CHECK-NEXT: vmovaps %xmm2, 96(%rdi) +; CHECK-NEXT: vmovaps %ymm1, 64(%rdi) +; CHECK-NEXT: vmovaps %zmm0, (%rdi) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/x86-interleaved-access.ll b/llvm/test/CodeGen/X86/x86-interleaved-access.ll index fcdebfa68a5e7..74a83214bf208 100644 --- a/llvm/test/CodeGen/X86/x86-interleaved-access.ll +++ b/llvm/test/CodeGen/X86/x86-interleaved-access.ll @@ -1055,64 +1055,24 @@ ret void } define void @interleaved_store_vf16_i8_stride3(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <48 x i8>* %p) { -; AVX1-LABEL: interleaved_store_vf16_i8_stride3: -; AVX1: # %bb.0: -; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] -; AVX1-NEXT: vpalignr {{.*#+}} xmm3 = xmm1[11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10] -; AVX1-NEXT: vpalignr {{.*#+}} xmm4 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4] -; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4] -; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4] -; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4] -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5] -; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4] -; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4] -; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vmovdqu %xmm0, 16(%rdi) -; AVX1-NEXT: vmovdqu %xmm1, (%rdi) -; AVX1-NEXT: vmovdqu %xmm2, 32(%rdi) -; AVX1-NEXT: retq -; -; AVX2-LABEL: interleaved_store_vf16_i8_stride3: -; AVX2: # %bb.0: -; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] -; AVX2-NEXT: vpalignr {{.*#+}} xmm3 = xmm1[11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10] -; AVX2-NEXT: vpalignr {{.*#+}} xmm4 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4] -; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4] -; AVX2-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4] -; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4] -; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5] -; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 -; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4] -; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 -; AVX2-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4] -; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vmovdqu %xmm0, 16(%rdi) -; AVX2-NEXT: vmovdqu %xmm1, (%rdi) -; AVX2-NEXT: vmovdqu %xmm2, 32(%rdi) -; AVX2-NEXT: retq -; -; AVX512-LABEL: interleaved_store_vf16_i8_stride3: -; AVX512: # %bb.0: -; AVX512-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] -; AVX512-NEXT: vpalignr {{.*#+}} xmm3 = xmm1[11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10] -; AVX512-NEXT: vpalignr {{.*#+}} xmm4 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4] -; AVX512-NEXT: vpalignr {{.*#+}} xmm0 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4] -; AVX512-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4] -; AVX512-NEXT: vpalignr {{.*#+}} xmm1 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4] -; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5] -; AVX512-NEXT: vpshufb %xmm3, %xmm1, %xmm1 -; AVX512-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4] -; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm0 -; AVX512-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4] -; AVX512-NEXT: vpshufb %xmm3, %xmm2, %xmm2 -; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 -; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm1 -; AVX512-NEXT: vmovdqu %ymm0, (%rdi) -; AVX512-NEXT: vextracti32x4 $2, %zmm1, 32(%rdi) -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX-LABEL: interleaved_store_vf16_i8_stride3: +; AVX: # %bb.0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] +; AVX-NEXT: vpalignr {{.*#+}} xmm3 = xmm1[11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10] +; AVX-NEXT: vpalignr {{.*#+}} xmm4 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4] +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4] +; AVX-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4] +; AVX-NEXT: vpalignr {{.*#+}} xmm1 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4] +; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5] +; AVX-NEXT: vpshufb %xmm3, %xmm1, %xmm1 +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4] +; AVX-NEXT: vpshufb %xmm3, %xmm0, %xmm0 +; AVX-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4] +; AVX-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX-NEXT: vmovdqu %xmm0, 16(%rdi) +; AVX-NEXT: vmovdqu %xmm1, (%rdi) +; AVX-NEXT: vmovdqu %xmm2, 32(%rdi) +; AVX-NEXT: retq %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <32 x i32> %2 = shufflevector <16 x i8> %c, <16 x i8> undef, <32 x i32> %interleaved.vec = shufflevector <32 x i8> %1, <32 x i8> %2, <48 x i32> diff --git a/llvm/test/MachineVerifier/live-ins-01.mir b/llvm/test/MachineVerifier/live-ins-01.mir new file mode 100644 index 0000000000000..51c05dacf0558 --- /dev/null +++ b/llvm/test/MachineVerifier/live-ins-01.mir @@ -0,0 +1,57 @@ +# RUN: not llc -o - %s -mtriple=s390x-linux-gnu -mcpu=z14 -run-pass none 2>&1 | FileCheck %s +# REQUIRES: systemz-registered-target + +# Test that a the machine verifier reports an error when a register in +# liveins is not liveout from predecessor. + +--- +name: f1 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $r2l, $r3l + + %1:gr32bit = COPY $r3l + %0:gr32bit = COPY $r2l + CHIMux %0, 0, implicit-def $cc + + bb.1: + liveins: $cc + + bb.2: + liveins: $cc + + %2:grx32bit = LOCRMux %1, %0, 14, 8, implicit $cc + $r2l = COPY %2 + Return implicit $r2l +... + +# CHECK: *** Bad machine code: Live in register not found to be live out from predecessor. *** +# CHECK:- function: f2 +# CHECK:- basic block: %bb.2 +# CHECK:CC not found to be live out from %bb.1 +--- +name: f2 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $r2l, $r3l + + %1:gr32bit = COPY $r3l + %0:gr32bit = COPY $r2l + CHIMux %0, 0, implicit-def $cc + + bb.1: + liveins: $cc + KILL killed $cc + + bb.2: + liveins: $cc + + %2:grx32bit = LOCRMux %1, %0, 14, 8, implicit $cc + $r2l = COPY %2 + Return implicit $r2l + +... diff --git a/llvm/test/MachineVerifier/live-ins-02.mir b/llvm/test/MachineVerifier/live-ins-02.mir new file mode 100644 index 0000000000000..d76325cdd1082 --- /dev/null +++ b/llvm/test/MachineVerifier/live-ins-02.mir @@ -0,0 +1,32 @@ +# RUN: not llc -o - %s -mtriple=s390x-linux-gnu -mcpu=z14 -run-pass none 2>&1 | FileCheck %s +# REQUIRES: systemz-registered-target + +# Test that a the machine verifier reports an error when a register in +# liveins is not liveout from predecessor. + +--- +name: f1 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $r2l, $r3l + + %1:gr32bit = COPY $r3l + %0:gr32bit = COPY $r2l + CHIMux %0, 0, implicit-def $cc + + bb.1: + + bb.2: + liveins: $cc + + %2:grx32bit = LOCRMux %1, %0, 14, 8, implicit $cc + $r2l = COPY %2 + Return implicit $r2l +... + +# CHECK: *** Bad machine code: Live in register not found to be live out from predecessor. *** +# CHECK:- function: f1 +# CHECK:- basic block: %bb.2 +# CHECK:CC not found to be live out from %bb.1 diff --git a/llvm/test/MachineVerifier/live-ins-03.mir b/llvm/test/MachineVerifier/live-ins-03.mir new file mode 100644 index 0000000000000..b5345ccdc3b63 --- /dev/null +++ b/llvm/test/MachineVerifier/live-ins-03.mir @@ -0,0 +1,36 @@ +# RUN: not llc -o - %s -mtriple=s390x-linux-gnu -mcpu=z14 -run-pass none 2>&1 | FileCheck %s +# REQUIRES: systemz-registered-target + +# Test that a the machine verifier reports an error when a register in +# liveins is not liveout from predecessor. + +--- +name: f1 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $r2l, $r3l + + %1:gr32bit = COPY $r3l + %0:gr32bit = COPY $r2l + CHIMux %0, 0, implicit-def $cc + + bb.1: + liveins: $cc + BRC 14, 8, %bb.3, implicit $cc + + bb.2: + + bb.3: + liveins: $cc + + %2:grx32bit = LOCRMux %1, %0, 14, 8, implicit $cc + $r2l = COPY %2 + Return implicit $r2l +... + +# CHECK: *** Bad machine code: Live in register not found to be live out from predecessor. *** +# CHECK:- function: f1 +# CHECK:- basic block: %bb.3 +# CHECK:CC not found to be live out from %bb.2 diff --git a/llvm/test/TableGen/GlobalISelEmitter.td b/llvm/test/TableGen/GlobalISelEmitter.td index 41c825950ca2f..59816497cc937 100644 --- a/llvm/test/TableGen/GlobalISelEmitter.td +++ b/llvm/test/TableGen/GlobalISelEmitter.td @@ -210,7 +210,7 @@ def HasC : Predicate<"Subtarget->hasC()"> { let RecomputePerFunction = 1; } // CHECK-NEXT: }; // CHECK-NEXT: MyTargetInstructionSelector::CustomRendererFn // CHECK-NEXT: MyTargetInstructionSelector::CustomRenderers[] = { -// CHECK-NEXT: nullptr, // GICP_Invalid +// CHECK-NEXT: nullptr, // GICR_Invalid // CHECK-NEXT: &MyTargetInstructionSelector::renderImm8, // gi_cimm8 // CHECK-NEXT: }; diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll index 87a3ba5811e16..db3db632e5f24 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll @@ -11,10 +11,6 @@ define void @zot() personality i32 (...)* @wibble { ; ATTRIBUTOR-NEXT: bb: ; ATTRIBUTOR-NEXT: call void @hoge() ; ATTRIBUTOR-NEXT: unreachable -; ATTRIBUTOR: bb.split: -; ATTRIBUTOR-NEXT: unreachable -; ATTRIBUTOR: bb1.i2c: -; ATTRIBUTOR-NEXT: unreachable ; ATTRIBUTOR: bb1: ; ATTRIBUTOR-NEXT: unreachable ; ATTRIBUTOR: bb2: @@ -47,8 +43,6 @@ define internal void @hoge() { ; ATTRIBUTOR-LABEL: define {{[^@]+}}@hoge() ; ATTRIBUTOR-NEXT: bb: ; ATTRIBUTOR-NEXT: unreachable -; ATTRIBUTOR: bb.split: -; ATTRIBUTOR-NEXT: unreachable ; bb: %tmp = call fastcc i8* @spam(i1 (i8*)* @eggs) @@ -77,8 +71,6 @@ define i32 @test_inf_promote_caller(i32 %arg) { ; CHECK-SAME: (i32 [[ARG:%.*]]) ; CHECK-NEXT: bb: ; CHECK-NEXT: unreachable -; CHECK: bb.split: -; CHECK-NEXT: unreachable ; bb: %tmp = alloca %S diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll index 36adfe08a4d20..153ce6893ba2a 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll @@ -17,8 +17,6 @@ define void @run() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @CaptureAStruct(%struct.Foo* nofree nonnull readonly align 8 dereferenceable(16) @a) ; CHECK-NEXT: unreachable -; CHECK: entry.split: -; CHECK-NEXT: unreachable ; entry: tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*)) diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll index b81d35491f944..a5ca51e9bd996 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes ; RUN: opt -S -basicaa -attributor -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=3 < %s | FileCheck %s --check-prefixes=CHECK,OLDPM_MODULE ; RUN: opt -S -passes='attributor' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=3 < %s | FileCheck %s --check-prefixes=CHECK,NEWPM_MODULE diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll index 271854a224569..d08969c0a2620 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll @@ -13,8 +13,6 @@ define i32 @bar() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = call addrspace(1) i32 @foo() ; CHECK-NEXT: unreachable -; CHECK: entry.split: -; CHECK-NEXT: unreachable ; entry: diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/recursion.ll b/llvm/test/Transforms/Attributor/IPConstantProp/recursion.ll index b9fd0468d380a..fc82342a989f6 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/recursion.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/recursion.ll @@ -12,8 +12,6 @@ define internal i32 @foo(i32 %X) { define void @bar() { ; CHECK-LABEL: define {{[^@]+}}@bar() ; CHECK-NEXT: unreachable -; CHECK: .split: -; CHECK-NEXT: unreachable ; call i32 @foo( i32 17 ) ; :1 [#uses=0] ret void diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/return-constant.ll b/llvm/test/Transforms/Attributor/IPConstantProp/return-constant.ll index 04927726daa25..f30461c746af8 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/return-constant.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/return-constant.ll @@ -8,8 +8,6 @@ define i1 @invokecaller(i1 %C) personality i32 (...)* @__gxx_personality_v0 { ; CHECK-SAME: (i1 [[C:%.*]]) #0 personality i32 (...)* @__gxx_personality_v0 ; CHECK-NEXT: [[X:%.*]] = call i32 @foo(i1 [[C]]) ; CHECK-NEXT: br label [[OK:%.*]] -; CHECK: .i2c: -; CHECK-NEXT: unreachable ; CHECK: OK: ; CHECK-NEXT: [[Y:%.*]] = icmp ne i32 52, 0 ; CHECK-NEXT: ret i1 [[Y]] diff --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll index cf9dc8b789dd3..4dd37865fcc09 100644 --- a/llvm/test/Transforms/Attributor/liveness.ll +++ b/llvm/test/Transforms/Attributor/liveness.ll @@ -277,6 +277,96 @@ cleanup: ret i32 0 } +; UTC_ARGS: --turn on + +; TEST 5.4 unounwind invoke instruction replaced by a call and a branch instruction put after it. +define i32 @invoke_nounwind_phi(i32 %a) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: define {{[^@]+}}@invoke_nounwind_phi +; CHECK-SAME: (i32 [[A:%.*]]) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: call void @normal_call() +; CHECK-NEXT: [[CALL:%.*]] = call i32 @foo_nounwind() +; CHECK-NEXT: br label [[CONTINUE:%.*]] +; CHECK: cond.false: +; CHECK-NEXT: call void @normal_call() +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @bar() +; CHECK-NEXT: br label [[CONTINUE]] +; CHECK: continue: +; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ 1, [[COND_FALSE]] ] +; CHECK-NEXT: ret i32 [[P]] +; CHECK: cleanup: +; CHECK-NEXT: unreachable +; +entry: + %cmp = icmp eq i32 %a, 0 + br i1 %cmp, label %cond.true, label %cond.false + +cond.true: ; preds = %entry + call void @normal_call() + %call = invoke i32 @foo_nounwind() to label %continue + unwind label %cleanup + +cond.false: ; preds = %entry + call void @normal_call() + %call1 = call i32 @bar() + br label %continue + +continue: + %p = phi i32 [ 0, %cond.true ], [ 1, %cond.false ] + ret i32 %p + +cleanup: + %res = landingpad { i8*, i32 } catch i8* null + ret i32 0 +} + +; TEST 5.5 unounwind invoke instruction replaced by a call and a branch instruction put after it. +define i32 @invoke_nounwind_phi_dom(i32 %a) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: define {{[^@]+}}@invoke_nounwind_phi_dom +; CHECK-SAME: (i32 [[A:%.*]]) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: call void @normal_call() +; CHECK-NEXT: [[CALL:%.*]] = call i32 @foo_nounwind() +; CHECK-NEXT: br label [[CONTINUE:%.*]] +; CHECK: cond.false: +; CHECK-NEXT: call void @normal_call() +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @bar() +; CHECK-NEXT: br label [[CONTINUE]] +; CHECK: continue: +; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[CALL]], [[COND_TRUE]] ], [ [[CALL1]], [[COND_FALSE]] ] +; CHECK-NEXT: ret i32 [[P]] +; CHECK: cleanup: +; CHECK-NEXT: unreachable +; +entry: + %cmp = icmp eq i32 %a, 0 + br i1 %cmp, label %cond.true, label %cond.false + +cond.true: ; preds = %entry + call void @normal_call() + %call = invoke i32 @foo_nounwind() to label %continue + unwind label %cleanup + +cond.false: ; preds = %entry + call void @normal_call() + %call1 = call i32 @bar() + br label %continue + +continue: + %p = phi i32 [ %call, %cond.true ], [ %call1, %cond.false ] + ret i32 %p + +cleanup: + %res = landingpad { i8*, i32 } catch i8* null + ret i32 0 +} + ; UTC_ARGS: --turn off ; TEST 6: Undefined behvior, taken from LangRef. @@ -707,7 +797,6 @@ define internal void @dead_e2() { ret void } ; CHECK-NEXT: define internal void @non_dead_d15() ; CHECK-NOT: define internal void @dead_e - declare void @blowup() noreturn define void @live_with_dead_entry() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; CHECK: define void @live_with_dead_entry( @@ -735,19 +824,19 @@ define void @live_with_dead_entry_lp() personality i8* bitcast (i32 (...)* @__gx ; CHECK: define void @live_with_dead_entry_lp( ; CHECK-NEXT: entry: ; CHECK-NEXT: invoke void @blowup() -; CHECK-NEXT: to label %live_with_dead_entry.dead unwind label %lp1 -; CHECK: lp1: ; preds = %entry +; CHECK-NEXT: to label %[[LIVE_WITH_DEAD_ENTRY_DEAD1:.*]] unwind label %[[LP1:.*]] +; CHECK: [[LP1]]: ; preds = %entry ; CHECK-NEXT: %lp = landingpad { i8*, i32 } ; CHECK-NEXT: catch i8* null ; CHECK-NEXT: invoke void @blowup() -; CHECK-NEXT: to label %live_with_dead_entry.dead1 unwind label %lp2 -; CHECK: lp2: ; preds = %lp1 +; CHECK-NEXT: to label %[[LIVE_WITH_DEAD_ENTRY_DEAD2:.*]] unwind label %[[LP2:.*]] +; CHECK: [[LP2]]: ; preds = %lp1 ; CHECK-NEXT: %0 = landingpad { i8*, i32 } ; CHECK-NEXT: catch i8* null ; CHECK-NEXT: br label %live_with_dead_entry -; CHECK: live_with_dead_entry.dead: +; CHECK: [[LIVE_WITH_DEAD_ENTRY_DEAD1]]: ; CHECK-NEXT: unreachable -; CHECK: live_with_dead_entry.dead1: +; CHECK: [[LIVE_WITH_DEAD_ENTRY_DEAD2]]: ; CHECK-NEXT: unreachable ; CHECK: live_with_dead_entry: ; preds = %lp2 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/noreturn_async.ll b/llvm/test/Transforms/Attributor/noreturn_async.ll index 7c00a5a0b5cbf..9fb99159acf5d 100644 --- a/llvm/test/Transforms/Attributor/noreturn_async.ll +++ b/llvm/test/Transforms/Attributor/noreturn_async.ll @@ -100,9 +100,9 @@ define dso_local i32 @"?catchoverflow@@YAHXZ_may_throw"() personality i8* bitca entry: %retval = alloca i32, align 4 %__exception_code = alloca i32, align 4 -; CHECK: invoke void @"?overflow@@YAXXZ_may_throw"() +; CHECK: invoke void @"?overflow@@YAXXZ_may_throw"() ; CHECK: to label %invoke.cont unwind label %catch.dispatch - invoke void @"?overflow@@YAXXZ_may_throw"() + invoke void @"?overflow@@YAXXZ_may_throw"() to label %invoke.cont unwind label %catch.dispatch invoke.cont: ; preds = %entry diff --git a/llvm/test/Transforms/Attributor/undefined_behavior.ll b/llvm/test/Transforms/Attributor/undefined_behavior.ll index e9b782452182f..fd0ddb1ebb850 100644 --- a/llvm/test/Transforms/Attributor/undefined_behavior.ll +++ b/llvm/test/Transforms/Attributor/undefined_behavior.ll @@ -16,6 +16,16 @@ define void @load_wholly_unreachable() { ret void } +define void @loads_wholly_unreachable() { +; ATTRIBUTOR-LABEL: @loads_wholly_unreachable( +; ATTRIBUTOR-NEXT: unreachable +; + %a = load i32, i32* null + %b = load i32, i32* null + ret void +} + + define void @load_single_bb_unreachable(i1 %cond) { ; ATTRIBUTOR-LABEL: @load_single_bb_unreachable( ; ATTRIBUTOR-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[E:%.*]] diff --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll index 8bfeb67f6e066..ec1119cb24573 100644 --- a/llvm/test/Transforms/InstCombine/fdiv.ll +++ b/llvm/test/Transforms/InstCombine/fdiv.ll @@ -187,17 +187,17 @@ define float @div_with_div_denominator_extra_use(float %x, float %y, float %z) { ret float %div2 } -; Z / (1.0 / Y) +; Z / (1.0 / Y) ==> Y * Z define float @div_with_div_denominator_with_one_as_numerator_extra_use(float %x, float %y, float %z) { ; CHECK-LABEL: @div_with_div_denominator_with_one_as_numerator_extra_use( ; CHECK-NEXT: [[DIV1:%.*]] = fdiv float 1.000000e+00, [[Y:%.*]] -; CHECK-NEXT: [[DIV2:%.*]] = fdiv fast float [[Z:%.*]], [[DIV1]] +; CHECK-NEXT: [[DIV2:%.*]] = fmul reassoc arcp float [[Y]], [[Z:%.*]] ; CHECK-NEXT: call void @use_f32(float [[DIV1]]) ; CHECK-NEXT: ret float [[DIV2]] ; %div1 = fdiv float 1.0, %y - %div2 = fdiv fast float %z, %div1 + %div2 = fdiv reassoc arcp float %z, %div1 call void @use_f32(float %div1) ret float %div2 } diff --git a/llvm/test/Transforms/InstSimplify/select.ll b/llvm/test/Transforms/InstSimplify/select.ll index 5581a3bd74719..4fe499ad4a24e 100644 --- a/llvm/test/Transforms/InstSimplify/select.ll +++ b/llvm/test/Transforms/InstSimplify/select.ll @@ -3,8 +3,7 @@ define i1 @bool_true_or_false(i1 %cond) { ; CHECK-LABEL: @bool_true_or_false( -; CHECK-NEXT: [[S:%.*]] = select i1 [[COND:%.*]], i1 true, i1 false -; CHECK-NEXT: ret i1 [[S]] +; CHECK-NEXT: ret i1 [[COND:%.*]] ; %s = select i1 %cond, i1 true, i1 false ret i1 %s @@ -12,8 +11,7 @@ define i1 @bool_true_or_false(i1 %cond) { define <2 x i1> @bool_true_or_false_vec(<2 x i1> %cond) { ; CHECK-LABEL: @bool_true_or_false_vec( -; CHECK-NEXT: [[S:%.*]] = select <2 x i1> [[COND:%.*]], <2 x i1> , <2 x i1> zeroinitializer -; CHECK-NEXT: ret <2 x i1> [[S]] +; CHECK-NEXT: ret <2 x i1> [[COND:%.*]] ; %s = select <2 x i1> %cond, <2 x i1> , <2 x i1> zeroinitializer ret <2 x i1> %s @@ -21,8 +19,7 @@ define <2 x i1> @bool_true_or_false_vec(<2 x i1> %cond) { define <2 x i1> @bool_true_or_false_vec_undef(<2 x i1> %cond) { ; CHECK-LABEL: @bool_true_or_false_vec_undef( -; CHECK-NEXT: [[S:%.*]] = select <2 x i1> [[COND:%.*]], <2 x i1> , <2 x i1> -; CHECK-NEXT: ret <2 x i1> [[S]] +; CHECK-NEXT: ret <2 x i1> [[COND:%.*]] ; %s = select <2 x i1> %cond, <2 x i1> , <2 x i1> ret <2 x i1> %s diff --git a/llvm/test/Transforms/LoopUnroll/ARM/mve-nounroll.ll b/llvm/test/Transforms/LoopUnroll/ARM/mve-nounroll.ll index 2067969a994ff..15016c1f7e7d9 100644 --- a/llvm/test/Transforms/LoopUnroll/ARM/mve-nounroll.ll +++ b/llvm/test/Transforms/LoopUnroll/ARM/mve-nounroll.ll @@ -121,6 +121,55 @@ for.cond.cleanup: ; preds = %for.cond1.for.cond. ret void } +; Test that we don't unroll loops that only contain vector intrinsics. +; CHECK-LABEL: test_intrinsics +; CHECK: call <16 x i8> @llvm.arm.mve.sub +; CHECK-NOT: call <16 x i8> @llvm.arm.mve.sub +define dso_local arm_aapcs_vfpcc void @test_intrinsics(i8* noalias nocapture readonly %a, i8* noalias nocapture readonly %b, i8* noalias nocapture %c, i32 %N) { +entry: + %cmp8 = icmp eq i32 %N, 0 + %tmp8 = add i32 %N, 15 + %tmp9 = lshr i32 %tmp8, 4 + %tmp10 = shl nuw i32 %tmp9, 4 + %tmp11 = add i32 %tmp10, -16 + %tmp12 = lshr i32 %tmp11, 4 + %tmp13 = add nuw nsw i32 %tmp12, 1 + br i1 %cmp8, label %for.cond.cleanup, label %vector.ph + +vector.ph: + br label %vector.body + +vector.body: + %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %tmp14 = phi i32 [ %tmp13, %vector.ph ], [ %tmp15, %vector.body ] + %0 = phi i32 [ %N, %vector.ph ], [ %2, %vector.body ] + %tmp = getelementptr inbounds i8, i8* %a, i32 %index + %1 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %0) + %2 = sub i32 %0, 16 + %tmp2 = bitcast i8* %tmp to <16 x i8>* + %wide.masked.load = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %tmp2, i32 4, <16 x i1> %1, <16 x i8> undef) + %tmp3 = getelementptr inbounds i8, i8* %b, i32 %index + %tmp4 = bitcast i8* %tmp3 to <16 x i8>* + %wide.masked.load2 = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %tmp4, i32 4, <16 x i1> %1, <16 x i8> undef) + %sub = call <16 x i8> @llvm.arm.mve.sub.predicated.v16i8.v16i1(<16 x i8> %wide.masked.load2, <16 x i8> %wide.masked.load, <16 x i1> %1, <16 x i8> undef) + %tmp6 = getelementptr inbounds i8, i8* %c, i32 %index + %tmp7 = bitcast i8* %tmp6 to <16 x i8>* + tail call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %sub, <16 x i8>* %tmp7, i32 4, <16 x i1> %1) + %index.next = add i32 %index, 16 + %tmp15 = sub i32 %tmp14, 1 + %tmp16 = icmp ne i32 %tmp15, 0 + br i1 %tmp16, label %vector.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %vector.body, %entry + ret void +} + +declare <16 x i1> @llvm.arm.mve.vctp8(i32) +declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>) +declare <16 x i8> @llvm.arm.mve.sub.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) +declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>) + + !0 = distinct !{!0, !1} !1 = !{!"llvm.loop.isvectorized", i32 1} !2 = distinct !{!2, !3, !1} diff --git a/llvm/test/Transforms/LoopUnrollAndJam/dependencies.ll b/llvm/test/Transforms/LoopUnrollAndJam/dependencies.ll index 890683043b4bf..d584238745cfe 100644 --- a/llvm/test/Transforms/LoopUnrollAndJam/dependencies.ll +++ b/llvm/test/Transforms/LoopUnrollAndJam/dependencies.ll @@ -1,4 +1,5 @@ ; RUN: opt -basicaa -loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 < %s -S | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='unroll-and-jam' -allow-unroll-and-jam -unroll-and-jam-count=4 < %s -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Transforms/LoopUnrollAndJam/disable.ll b/llvm/test/Transforms/LoopUnrollAndJam/disable.ll index 4a00937b9c58c..6e879896c55fc 100644 --- a/llvm/test/Transforms/LoopUnrollAndJam/disable.ll +++ b/llvm/test/Transforms/LoopUnrollAndJam/disable.ll @@ -1,4 +1,5 @@ ; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 -pass-remarks=loop-unroll-and-jam < %s -S 2>&1 | FileCheck %s +; RUN: opt -passes='unroll-and-jam' -allow-unroll-and-jam -unroll-and-jam-count=4 -pass-remarks=loop-unroll-and-jam < %s -S 2>&1 | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Transforms/LoopUnrollAndJam/pragma-explicit.ll b/llvm/test/Transforms/LoopUnrollAndJam/pragma-explicit.ll index 5254c779d0f87..f2c12702f9e89 100644 --- a/llvm/test/Transforms/LoopUnrollAndJam/pragma-explicit.ll +++ b/llvm/test/Transforms/LoopUnrollAndJam/pragma-explicit.ll @@ -1,4 +1,5 @@ ; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -unroll-runtime -unroll-partial-threshold=60 < %s -S | FileCheck %s +; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='unroll-and-jam' -allow-unroll-and-jam -unroll-runtime -unroll-partial-threshold=60 < %s -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll index bdb47c27f1cfc..7580b50f8703b 100644 --- a/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll +++ b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -basicaa -tbaa -loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 -unroll-remainder < %s -S | FileCheck %s +; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='unroll-and-jam' -allow-unroll-and-jam -unroll-and-jam-count=4 -unroll-remainder < %s -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll index f005845f2db85..7195e61c280fc 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll @@ -84,4 +84,4 @@ exit: ret void } -attributes #0 = { nounwind "min-legal-vector-width"="0" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+fp-armv8d16sp,+fp16,+fpregs,+fullfp16,+hwdiv,+lob,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2sp,+vfp3d16sp,+vfp4d16sp" "use-soft-float"="false" } +attributes #0 = { nounwind "min-legal-vector-width"="0" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+fp-armv8d16sp,+fp16,+fullfp16,+hwdiv,+lob,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2sp,+vfp3d16sp,+vfp4d16sp" "use-soft-float"="false" } diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-vldn.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-vldn.ll index cb6e1005db1c2..353e725580a4a 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-vldn.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-vldn.ll @@ -84,4 +84,4 @@ while.end: ; preds = %while.body, %entry ret void } -attributes #0 = { "target-features"="+armv8.1-m.main,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fpregs,+fullfp16,+hwdiv,+lob,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-crypto,-d32,-fp-armv8,-fp-armv8sp,-neon,-vfp3,-vfp3sp,-vfp4,-vfp4sp" } +attributes #0 = { "target-features"="+armv8.1-m.main,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-crypto,-d32,-fp-armv8,-fp-armv8sp,-neon,-vfp3,-vfp3sp,-vfp4,-vfp4sp" } diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll new file mode 100644 index 0000000000000..5a3438230a2d5 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll @@ -0,0 +1,47 @@ +; RUN: opt < %s -loop-vectorize -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -prefer-predicate-over-epilog -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -disable-mve-tail-predication=false -S | FileCheck %s + +; Check that when we can't predicate this loop that it is still vectorised (with +; an epilogue). +; TODO: the reason this can't be predicated is because a primary induction +; variable can't be found (not yet) for this counting down loop. But with that +; fixed, this should be able to be predicated. + +; CHECK-LABEL: vector.body: + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8.1m.main-arm-unknown-eabihf" + +define dso_local void @foo(i8* noalias nocapture readonly %A, i8* noalias nocapture readonly %B, i8* noalias nocapture %C, i32 %N) #0 { +entry: + %cmp6 = icmp eq i32 %N, 0 + br i1 %cmp6, label %while.end, label %while.body.preheader + +while.body.preheader: + br label %while.body + +while.body: + %N.addr.010 = phi i32 [ %dec, %while.body ], [ %N, %while.body.preheader ] + %C.addr.09 = phi i8* [ %incdec.ptr4, %while.body ], [ %C, %while.body.preheader ] + %B.addr.08 = phi i8* [ %incdec.ptr1, %while.body ], [ %B, %while.body.preheader ] + %A.addr.07 = phi i8* [ %incdec.ptr, %while.body ], [ %A, %while.body.preheader ] + %incdec.ptr = getelementptr inbounds i8, i8* %A.addr.07, i32 1 + %0 = load i8, i8* %A.addr.07, align 1 + %incdec.ptr1 = getelementptr inbounds i8, i8* %B.addr.08, i32 1 + %1 = load i8, i8* %B.addr.08, align 1 + %add = add i8 %1, %0 + %incdec.ptr4 = getelementptr inbounds i8, i8* %C.addr.09, i32 1 + store i8 %add, i8* %C.addr.09, align 1 + %dec = add i32 %N.addr.010, -1 + %cmp = icmp eq i32 %dec, 0 + br i1 %cmp, label %while.end.loopexit, label %while.body + +while.end.loopexit: + br label %while.end + +while.end: + ret void +} + +attributes #0 = { nofree norecurse nounwind "target-features"="+armv8.1-m.main,+mve.fp" } diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll new file mode 100644 index 0000000000000..2667bfe68f616 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll @@ -0,0 +1,42 @@ +; RUN: opt < %s -loop-vectorize -prefer-predicate-over-epilog -S | FileCheck %s + +; Check that when we can't predicate this loop that it is still vectorised (with +; an epilogue). +; TODO: the reason this can't be predicated is because a primary induction +; variable can't be found (not yet) for this counting down loop. But with that +; fixed, this should be able to be predicated. + +; CHECK-LABEL: vector.body: + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + +define dso_local void @foo(i8* noalias nocapture readonly %A, i8* noalias nocapture readonly %B, i8* noalias nocapture %C, i32 %N) { +entry: + %cmp6 = icmp eq i32 %N, 0 + br i1 %cmp6, label %while.end, label %while.body.preheader + +while.body.preheader: + br label %while.body + +while.body: + %N.addr.010 = phi i32 [ %dec, %while.body ], [ %N, %while.body.preheader ] + %C.addr.09 = phi i8* [ %incdec.ptr4, %while.body ], [ %C, %while.body.preheader ] + %B.addr.08 = phi i8* [ %incdec.ptr1, %while.body ], [ %B, %while.body.preheader ] + %A.addr.07 = phi i8* [ %incdec.ptr, %while.body ], [ %A, %while.body.preheader ] + %incdec.ptr = getelementptr inbounds i8, i8* %A.addr.07, i32 1 + %0 = load i8, i8* %A.addr.07, align 1 + %incdec.ptr1 = getelementptr inbounds i8, i8* %B.addr.08, i32 1 + %1 = load i8, i8* %B.addr.08, align 1 + %add = add i8 %1, %0 + %incdec.ptr4 = getelementptr inbounds i8, i8* %C.addr.09, i32 1 + store i8 %add, i8* %C.addr.09, align 1 + %dec = add i32 %N.addr.010, -1 + %cmp = icmp eq i32 %dec, 0 + br i1 %cmp, label %while.end.loopexit, label %while.body + +while.end.loopexit: + br label %while.end + +while.end: + ret void +} diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/bigger-expressions-double.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/bigger-expressions-double.ll index 246f4e42c2fa7..b72ecf5c0e4c0 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/bigger-expressions-double.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/bigger-expressions-double.ll @@ -6,59 +6,63 @@ define void @transpose_multiply(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr, <9 x ; CHECK-LABEL: @transpose_multiply( ; CHECK-NEXT: entry: -; Load input matrixes %A and %B. - -; CHECK-NEXT: [[A:%.*]] = load <9 x double>, <9 x double>* [[A_PTR:%.*]] -; CHECK-NEXT: [[B:%.*]] = load <9 x double>, <9 x double>* [[B_PTR:%.*]] - -; Extract columns from loaded value %A. - -; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <9 x double> [[A]], <9 x double> undef, <3 x i32> -; CHECK-NEXT: [[SPLIT1:%.*]] = shufflevector <9 x double> [[A]], <9 x double> undef, <3 x i32> -; CHECK-NEXT: [[SPLIT2:%.*]] = shufflevector <9 x double> [[A]], <9 x double> undef, <3 x i32> +; Load columns of input matrixes %A and %B. + +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <9 x double>* [[A_PTR:%.*]] to double* +; CHECK-NEXT: [[COL_CAST:%.*]] = bitcast double* [[TMP0]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST]], align 8 +; CHECK-NEXT: [[COL_GEP:%.*]] = getelementptr double, double* [[TMP0]], i32 3 +; CHECK-NEXT: [[COL_CAST1:%.*]] = bitcast double* [[COL_GEP]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST1]], align 8 +; CHECK-NEXT: [[COL_GEP3:%.*]] = getelementptr double, double* [[TMP0]], i32 6 +; CHECK-NEXT: [[COL_CAST4:%.*]] = bitcast double* [[COL_GEP3]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST4]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <9 x double>* [[B_PTR:%.*]] to double* +; CHECK-NEXT: [[COL_CAST6:%.*]] = bitcast double* [[TMP1]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD7:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST6]], align 8 +; CHECK-NEXT: [[COL_GEP8:%.*]] = getelementptr double, double* [[TMP1]], i32 3 +; CHECK-NEXT: [[COL_CAST9:%.*]] = bitcast double* [[COL_GEP8]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD10:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST9]], align 8 +; CHECK-NEXT: [[COL_GEP11:%.*]] = getelementptr double, double* [[TMP1]], i32 6 +; CHECK-NEXT: [[COL_CAST12:%.*]] = bitcast double* [[COL_GEP11]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD13:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST12]], align 8 ; Transpose %A. -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x double> [[SPLIT]], i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x double> undef, double [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x double> [[SPLIT1]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x double> [[TMP1]], double [[TMP2]], i64 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <3 x double> [[SPLIT2]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 0 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <3 x double> [[TMP3]], double [[TMP4]], i64 2 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x double> [[SPLIT]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 1 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x double> undef, double [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <3 x double> [[SPLIT1]], i64 1 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 1 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <3 x double> [[TMP7]], double [[TMP8]], i64 1 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <3 x double> [[SPLIT2]], i64 1 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 1 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <3 x double> [[TMP9]], double [[TMP10]], i64 2 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <3 x double> [[SPLIT]], i64 2 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 2 ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <3 x double> undef, double [[TMP12]], i64 0 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <3 x double> [[SPLIT1]], i64 2 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 2 ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <3 x double> [[TMP13]], double [[TMP14]], i64 1 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <3 x double> [[SPLIT2]], i64 2 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 2 ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <3 x double> [[TMP15]], double [[TMP16]], i64 2 -; Extract columns from %B. - -; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <9 x double> [[B]], <9 x double> undef, <3 x i32> -; CHECK-NEXT: [[SPLIT4:%.*]] = shufflevector <9 x double> [[B]], <9 x double> undef, <3 x i32> -; CHECK-NEXT: [[SPLIT5:%.*]] = shufflevector <9 x double> [[B]], <9 x double> undef, <3 x i32> - ; Lower multiply(transpose(%A), %B) ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 0 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP18]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP19:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]] ; CHECK-NEXT: [[BLOCK6:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 1 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> undef, double [[TMP20]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT7]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = fmul <1 x double> [[BLOCK6]], [[SPLAT_SPLAT8]] ; CHECK-NEXT: [[TMP22:%.*]] = fadd <1 x double> [[TMP19]], [[TMP21]] ; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 2 +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2 ; CHECK-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> undef, double [[TMP23]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]] @@ -66,18 +70,18 @@ define void @transpose_multiply(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr, <9 x ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> undef, <3 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP26]], <3 x i32> ; CHECK-NEXT: [[BLOCK12:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> undef, double [[TMP28]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP29:%.*]] = fmul <1 x double> [[BLOCK12]], [[SPLAT_SPLAT14]] ; CHECK-NEXT: [[BLOCK15:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP30:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 1 +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> undef, double [[TMP30]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP31:%.*]] = fmul <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]] ; CHECK-NEXT: [[TMP32:%.*]] = fadd <1 x double> [[TMP29]], [[TMP31]] ; CHECK-NEXT: [[BLOCK18:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP33:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 2 +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2 ; CHECK-NEXT: [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> undef, double [[TMP33]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP34:%.*]] = fmul <1 x double> [[BLOCK18]], [[SPLAT_SPLAT20]] @@ -85,18 +89,18 @@ define void @transpose_multiply(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr, <9 x ; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <1 x double> [[TMP35]], <1 x double> undef, <3 x i32> ; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <3 x double> [[TMP27]], <3 x double> [[TMP36]], <3 x i32> ; CHECK-NEXT: [[BLOCK21:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP38:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 0 +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x double> undef, double [[TMP38]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT23:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT22]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP39:%.*]] = fmul <1 x double> [[BLOCK21]], [[SPLAT_SPLAT23]] ; CHECK-NEXT: [[BLOCK24:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP40:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 1 +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x double> undef, double [[TMP40]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT26:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT25]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP41:%.*]] = fmul <1 x double> [[BLOCK24]], [[SPLAT_SPLAT26]] ; CHECK-NEXT: [[TMP42:%.*]] = fadd <1 x double> [[TMP39]], [[TMP41]] ; CHECK-NEXT: [[BLOCK27:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP43:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 2 +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2 ; CHECK-NEXT: [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x double> undef, double [[TMP43]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT29:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT28]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP44:%.*]] = fmul <1 x double> [[BLOCK27]], [[SPLAT_SPLAT29]] @@ -104,18 +108,18 @@ define void @transpose_multiply(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr, <9 x ; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <1 x double> [[TMP45]], <1 x double> undef, <3 x i32> ; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <3 x double> [[TMP37]], <3 x double> [[TMP46]], <3 x i32> ; CHECK-NEXT: [[BLOCK30:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP48:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 0 +; CHECK-NEXT: [[TMP48:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT31:%.*]] = insertelement <1 x double> undef, double [[TMP48]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT32:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT31]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP49:%.*]] = fmul <1 x double> [[BLOCK30]], [[SPLAT_SPLAT32]] ; CHECK-NEXT: [[BLOCK33:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP50:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 1 +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT34:%.*]] = insertelement <1 x double> undef, double [[TMP50]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT35:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT34]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP51:%.*]] = fmul <1 x double> [[BLOCK33]], [[SPLAT_SPLAT35]] ; CHECK-NEXT: [[TMP52:%.*]] = fadd <1 x double> [[TMP49]], [[TMP51]] ; CHECK-NEXT: [[BLOCK36:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP53:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 2 +; CHECK-NEXT: [[TMP53:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2 ; CHECK-NEXT: [[SPLAT_SPLATINSERT37:%.*]] = insertelement <1 x double> undef, double [[TMP53]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT38:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT37]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP54:%.*]] = fmul <1 x double> [[BLOCK36]], [[SPLAT_SPLAT38]] @@ -123,18 +127,18 @@ define void @transpose_multiply(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr, <9 x ; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <1 x double> [[TMP55]], <1 x double> undef, <3 x i32> ; CHECK-NEXT: [[TMP57:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP56]], <3 x i32> ; CHECK-NEXT: [[BLOCK39:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP58:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 0 +; CHECK-NEXT: [[TMP58:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT40:%.*]] = insertelement <1 x double> undef, double [[TMP58]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT41:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT40]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP59:%.*]] = fmul <1 x double> [[BLOCK39]], [[SPLAT_SPLAT41]] ; CHECK-NEXT: [[BLOCK42:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP60:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 1 +; CHECK-NEXT: [[TMP60:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT43:%.*]] = insertelement <1 x double> undef, double [[TMP60]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT44:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT43]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP61:%.*]] = fmul <1 x double> [[BLOCK42]], [[SPLAT_SPLAT44]] ; CHECK-NEXT: [[TMP62:%.*]] = fadd <1 x double> [[TMP59]], [[TMP61]] ; CHECK-NEXT: [[BLOCK45:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP63:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 2 +; CHECK-NEXT: [[TMP63:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2 ; CHECK-NEXT: [[SPLAT_SPLATINSERT46:%.*]] = insertelement <1 x double> undef, double [[TMP63]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT47:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT46]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP64:%.*]] = fmul <1 x double> [[BLOCK45]], [[SPLAT_SPLAT47]] @@ -142,18 +146,18 @@ define void @transpose_multiply(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr, <9 x ; CHECK-NEXT: [[TMP66:%.*]] = shufflevector <1 x double> [[TMP65]], <1 x double> undef, <3 x i32> ; CHECK-NEXT: [[TMP67:%.*]] = shufflevector <3 x double> [[TMP57]], <3 x double> [[TMP66]], <3 x i32> ; CHECK-NEXT: [[BLOCK48:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP68:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 0 +; CHECK-NEXT: [[TMP68:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT49:%.*]] = insertelement <1 x double> undef, double [[TMP68]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT50:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT49]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP69:%.*]] = fmul <1 x double> [[BLOCK48]], [[SPLAT_SPLAT50]] ; CHECK-NEXT: [[BLOCK51:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP70:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 1 +; CHECK-NEXT: [[TMP70:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT52:%.*]] = insertelement <1 x double> undef, double [[TMP70]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT53:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT52]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP71:%.*]] = fmul <1 x double> [[BLOCK51]], [[SPLAT_SPLAT53]] ; CHECK-NEXT: [[TMP72:%.*]] = fadd <1 x double> [[TMP69]], [[TMP71]] ; CHECK-NEXT: [[BLOCK54:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP73:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 2 +; CHECK-NEXT: [[TMP73:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2 ; CHECK-NEXT: [[SPLAT_SPLATINSERT55:%.*]] = insertelement <1 x double> undef, double [[TMP73]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT56:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT55]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP74:%.*]] = fmul <1 x double> [[BLOCK54]], [[SPLAT_SPLAT56]] @@ -161,18 +165,18 @@ define void @transpose_multiply(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr, <9 x ; CHECK-NEXT: [[TMP76:%.*]] = shufflevector <1 x double> [[TMP75]], <1 x double> undef, <3 x i32> ; CHECK-NEXT: [[TMP77:%.*]] = shufflevector <3 x double> [[TMP67]], <3 x double> [[TMP76]], <3 x i32> ; CHECK-NEXT: [[BLOCK57:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP78:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 0 +; CHECK-NEXT: [[TMP78:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT58:%.*]] = insertelement <1 x double> undef, double [[TMP78]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT59:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT58]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP79:%.*]] = fmul <1 x double> [[BLOCK57]], [[SPLAT_SPLAT59]] ; CHECK-NEXT: [[BLOCK60:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP80:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 1 +; CHECK-NEXT: [[TMP80:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT61:%.*]] = insertelement <1 x double> undef, double [[TMP80]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT62:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT61]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP81:%.*]] = fmul <1 x double> [[BLOCK60]], [[SPLAT_SPLAT62]] ; CHECK-NEXT: [[TMP82:%.*]] = fadd <1 x double> [[TMP79]], [[TMP81]] ; CHECK-NEXT: [[BLOCK63:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP83:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 2 +; CHECK-NEXT: [[TMP83:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2 ; CHECK-NEXT: [[SPLAT_SPLATINSERT64:%.*]] = insertelement <1 x double> undef, double [[TMP83]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT65:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT64]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP84:%.*]] = fmul <1 x double> [[BLOCK63]], [[SPLAT_SPLAT65]] @@ -180,18 +184,18 @@ define void @transpose_multiply(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr, <9 x ; CHECK-NEXT: [[TMP86:%.*]] = shufflevector <1 x double> [[TMP85]], <1 x double> undef, <3 x i32> ; CHECK-NEXT: [[TMP87:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP86]], <3 x i32> ; CHECK-NEXT: [[BLOCK66:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP88:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 0 +; CHECK-NEXT: [[TMP88:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT67:%.*]] = insertelement <1 x double> undef, double [[TMP88]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT68:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT67]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP89:%.*]] = fmul <1 x double> [[BLOCK66]], [[SPLAT_SPLAT68]] ; CHECK-NEXT: [[BLOCK69:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP90:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 1 +; CHECK-NEXT: [[TMP90:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT70:%.*]] = insertelement <1 x double> undef, double [[TMP90]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT71:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT70]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP91:%.*]] = fmul <1 x double> [[BLOCK69]], [[SPLAT_SPLAT71]] ; CHECK-NEXT: [[TMP92:%.*]] = fadd <1 x double> [[TMP89]], [[TMP91]] ; CHECK-NEXT: [[BLOCK72:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP93:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 2 +; CHECK-NEXT: [[TMP93:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2 ; CHECK-NEXT: [[SPLAT_SPLATINSERT73:%.*]] = insertelement <1 x double> undef, double [[TMP93]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT74:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT73]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP94:%.*]] = fmul <1 x double> [[BLOCK72]], [[SPLAT_SPLAT74]] @@ -199,18 +203,18 @@ define void @transpose_multiply(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr, <9 x ; CHECK-NEXT: [[TMP96:%.*]] = shufflevector <1 x double> [[TMP95]], <1 x double> undef, <3 x i32> ; CHECK-NEXT: [[TMP97:%.*]] = shufflevector <3 x double> [[TMP87]], <3 x double> [[TMP96]], <3 x i32> ; CHECK-NEXT: [[BLOCK75:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP98:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 0 +; CHECK-NEXT: [[TMP98:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT76:%.*]] = insertelement <1 x double> undef, double [[TMP98]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT77:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT76]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP99:%.*]] = fmul <1 x double> [[BLOCK75]], [[SPLAT_SPLAT77]] ; CHECK-NEXT: [[BLOCK78:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP100:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 1 +; CHECK-NEXT: [[TMP100:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT79:%.*]] = insertelement <1 x double> undef, double [[TMP100]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT80:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT79]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP101:%.*]] = fmul <1 x double> [[BLOCK78]], [[SPLAT_SPLAT80]] ; CHECK-NEXT: [[TMP102:%.*]] = fadd <1 x double> [[TMP99]], [[TMP101]] ; CHECK-NEXT: [[BLOCK81:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP103:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 2 +; CHECK-NEXT: [[TMP103:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2 ; CHECK-NEXT: [[SPLAT_SPLATINSERT82:%.*]] = insertelement <1 x double> undef, double [[TMP103]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT83:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT82]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP104:%.*]] = fmul <1 x double> [[BLOCK81]], [[SPLAT_SPLAT83]] @@ -248,59 +252,61 @@ define void @transpose_multiply_add(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr, ; CHECK-LABEL: @transpose_multiply_add( ; CHECK-NEXT: entry: -; Load input matrixes %A and %B. - -; CHECK-NEXT: [[A:%.*]] = load <9 x double>, <9 x double>* [[A_PTR:%.*]] -; CHECK-NEXT: [[B:%.*]] = load <9 x double>, <9 x double>* [[B_PTR:%.*]] - -; Extract columns from loaded value %A. - -; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <9 x double> [[A]], <9 x double> undef, <3 x i32> -; CHECK-NEXT: [[SPLIT1:%.*]] = shufflevector <9 x double> [[A]], <9 x double> undef, <3 x i32> -; CHECK-NEXT: [[SPLIT2:%.*]] = shufflevector <9 x double> [[A]], <9 x double> undef, <3 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <9 x double>* [[A_PTR:%.*]] to double* +; CHECK-NEXT: [[COL_CAST:%.*]] = bitcast double* [[TMP0]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST]], align 8 +; CHECK-NEXT: [[COL_GEP:%.*]] = getelementptr double, double* [[TMP0]], i32 3 +; CHECK-NEXT: [[COL_CAST1:%.*]] = bitcast double* [[COL_GEP]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST1]], align 8 +; CHECK-NEXT: [[COL_GEP3:%.*]] = getelementptr double, double* [[TMP0]], i32 6 +; CHECK-NEXT: [[COL_CAST4:%.*]] = bitcast double* [[COL_GEP3]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST4]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <9 x double>* [[B_PTR:%.*]] to double* +; CHECK-NEXT: [[COL_CAST6:%.*]] = bitcast double* [[TMP1]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD7:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST6]], align 8 +; CHECK-NEXT: [[COL_GEP8:%.*]] = getelementptr double, double* [[TMP1]], i32 3 +; CHECK-NEXT: [[COL_CAST9:%.*]] = bitcast double* [[COL_GEP8]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD10:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST9]], align 8 +; CHECK-NEXT: [[COL_GEP11:%.*]] = getelementptr double, double* [[TMP1]], i32 6 +; CHECK-NEXT: [[COL_CAST12:%.*]] = bitcast double* [[COL_GEP11]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD13:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST12]], align 8 ; Transpose %A. -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x double> [[SPLIT]], i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x double> undef, double [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x double> [[SPLIT1]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x double> [[TMP1]], double [[TMP2]], i64 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <3 x double> [[SPLIT2]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 0 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <3 x double> [[TMP3]], double [[TMP4]], i64 2 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x double> [[SPLIT]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 1 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x double> undef, double [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <3 x double> [[SPLIT1]], i64 1 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 1 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <3 x double> [[TMP7]], double [[TMP8]], i64 1 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <3 x double> [[SPLIT2]], i64 1 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 1 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <3 x double> [[TMP9]], double [[TMP10]], i64 2 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <3 x double> [[SPLIT]], i64 2 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 2 ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <3 x double> undef, double [[TMP12]], i64 0 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <3 x double> [[SPLIT1]], i64 2 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 2 ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <3 x double> [[TMP13]], double [[TMP14]], i64 1 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <3 x double> [[SPLIT2]], i64 2 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 2 ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <3 x double> [[TMP15]], double [[TMP16]], i64 2 -; Extract columns from %B. - -; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <9 x double> [[B]], <9 x double> undef, <3 x i32> -; CHECK-NEXT: [[SPLIT4:%.*]] = shufflevector <9 x double> [[B]], <9 x double> undef, <3 x i32> -; CHECK-NEXT: [[SPLIT5:%.*]] = shufflevector <9 x double> [[B]], <9 x double> undef, <3 x i32> - ; Lower multiply(transpose(%A), %B) ; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 0 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> undef, double [[TMP18]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP19:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]] ; CHECK-NEXT: [[BLOCK6:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 1 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> undef, double [[TMP20]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT7]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = fmul <1 x double> [[BLOCK6]], [[SPLAT_SPLAT8]] ; CHECK-NEXT: [[TMP22:%.*]] = fadd <1 x double> [[TMP19]], [[TMP21]] ; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 2 +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2 ; CHECK-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> undef, double [[TMP23]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]] @@ -308,18 +314,18 @@ define void @transpose_multiply_add(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr, ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> undef, <3 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP26]], <3 x i32> ; CHECK-NEXT: [[BLOCK12:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> undef, double [[TMP28]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP29:%.*]] = fmul <1 x double> [[BLOCK12]], [[SPLAT_SPLAT14]] ; CHECK-NEXT: [[BLOCK15:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP30:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 1 +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> undef, double [[TMP30]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP31:%.*]] = fmul <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]] ; CHECK-NEXT: [[TMP32:%.*]] = fadd <1 x double> [[TMP29]], [[TMP31]] ; CHECK-NEXT: [[BLOCK18:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP33:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 2 +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2 ; CHECK-NEXT: [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> undef, double [[TMP33]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP34:%.*]] = fmul <1 x double> [[BLOCK18]], [[SPLAT_SPLAT20]] @@ -327,18 +333,18 @@ define void @transpose_multiply_add(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr, ; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <1 x double> [[TMP35]], <1 x double> undef, <3 x i32> ; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <3 x double> [[TMP27]], <3 x double> [[TMP36]], <3 x i32> ; CHECK-NEXT: [[BLOCK21:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP38:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 0 +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x double> undef, double [[TMP38]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT23:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT22]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP39:%.*]] = fmul <1 x double> [[BLOCK21]], [[SPLAT_SPLAT23]] ; CHECK-NEXT: [[BLOCK24:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP40:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 1 +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x double> undef, double [[TMP40]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT26:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT25]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP41:%.*]] = fmul <1 x double> [[BLOCK24]], [[SPLAT_SPLAT26]] ; CHECK-NEXT: [[TMP42:%.*]] = fadd <1 x double> [[TMP39]], [[TMP41]] ; CHECK-NEXT: [[BLOCK27:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP43:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 2 +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2 ; CHECK-NEXT: [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x double> undef, double [[TMP43]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT29:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT28]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP44:%.*]] = fmul <1 x double> [[BLOCK27]], [[SPLAT_SPLAT29]] @@ -346,18 +352,18 @@ define void @transpose_multiply_add(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr, ; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <1 x double> [[TMP45]], <1 x double> undef, <3 x i32> ; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <3 x double> [[TMP37]], <3 x double> [[TMP46]], <3 x i32> ; CHECK-NEXT: [[BLOCK30:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP48:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 0 +; CHECK-NEXT: [[TMP48:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT31:%.*]] = insertelement <1 x double> undef, double [[TMP48]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT32:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT31]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP49:%.*]] = fmul <1 x double> [[BLOCK30]], [[SPLAT_SPLAT32]] ; CHECK-NEXT: [[BLOCK33:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP50:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 1 +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT34:%.*]] = insertelement <1 x double> undef, double [[TMP50]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT35:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT34]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP51:%.*]] = fmul <1 x double> [[BLOCK33]], [[SPLAT_SPLAT35]] ; CHECK-NEXT: [[TMP52:%.*]] = fadd <1 x double> [[TMP49]], [[TMP51]] ; CHECK-NEXT: [[BLOCK36:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP53:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 2 +; CHECK-NEXT: [[TMP53:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2 ; CHECK-NEXT: [[SPLAT_SPLATINSERT37:%.*]] = insertelement <1 x double> undef, double [[TMP53]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT38:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT37]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP54:%.*]] = fmul <1 x double> [[BLOCK36]], [[SPLAT_SPLAT38]] @@ -365,18 +371,18 @@ define void @transpose_multiply_add(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr, ; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <1 x double> [[TMP55]], <1 x double> undef, <3 x i32> ; CHECK-NEXT: [[TMP57:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP56]], <3 x i32> ; CHECK-NEXT: [[BLOCK39:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP58:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 0 +; CHECK-NEXT: [[TMP58:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT40:%.*]] = insertelement <1 x double> undef, double [[TMP58]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT41:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT40]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP59:%.*]] = fmul <1 x double> [[BLOCK39]], [[SPLAT_SPLAT41]] ; CHECK-NEXT: [[BLOCK42:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP60:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 1 +; CHECK-NEXT: [[TMP60:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT43:%.*]] = insertelement <1 x double> undef, double [[TMP60]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT44:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT43]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP61:%.*]] = fmul <1 x double> [[BLOCK42]], [[SPLAT_SPLAT44]] ; CHECK-NEXT: [[TMP62:%.*]] = fadd <1 x double> [[TMP59]], [[TMP61]] ; CHECK-NEXT: [[BLOCK45:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP63:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 2 +; CHECK-NEXT: [[TMP63:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2 ; CHECK-NEXT: [[SPLAT_SPLATINSERT46:%.*]] = insertelement <1 x double> undef, double [[TMP63]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT47:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT46]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP64:%.*]] = fmul <1 x double> [[BLOCK45]], [[SPLAT_SPLAT47]] @@ -384,18 +390,18 @@ define void @transpose_multiply_add(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr, ; CHECK-NEXT: [[TMP66:%.*]] = shufflevector <1 x double> [[TMP65]], <1 x double> undef, <3 x i32> ; CHECK-NEXT: [[TMP67:%.*]] = shufflevector <3 x double> [[TMP57]], <3 x double> [[TMP66]], <3 x i32> ; CHECK-NEXT: [[BLOCK48:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP68:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 0 +; CHECK-NEXT: [[TMP68:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT49:%.*]] = insertelement <1 x double> undef, double [[TMP68]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT50:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT49]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP69:%.*]] = fmul <1 x double> [[BLOCK48]], [[SPLAT_SPLAT50]] ; CHECK-NEXT: [[BLOCK51:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP70:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 1 +; CHECK-NEXT: [[TMP70:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT52:%.*]] = insertelement <1 x double> undef, double [[TMP70]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT53:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT52]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP71:%.*]] = fmul <1 x double> [[BLOCK51]], [[SPLAT_SPLAT53]] ; CHECK-NEXT: [[TMP72:%.*]] = fadd <1 x double> [[TMP69]], [[TMP71]] ; CHECK-NEXT: [[BLOCK54:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP73:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 2 +; CHECK-NEXT: [[TMP73:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2 ; CHECK-NEXT: [[SPLAT_SPLATINSERT55:%.*]] = insertelement <1 x double> undef, double [[TMP73]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT56:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT55]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP74:%.*]] = fmul <1 x double> [[BLOCK54]], [[SPLAT_SPLAT56]] @@ -403,18 +409,18 @@ define void @transpose_multiply_add(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr, ; CHECK-NEXT: [[TMP76:%.*]] = shufflevector <1 x double> [[TMP75]], <1 x double> undef, <3 x i32> ; CHECK-NEXT: [[TMP77:%.*]] = shufflevector <3 x double> [[TMP67]], <3 x double> [[TMP76]], <3 x i32> ; CHECK-NEXT: [[BLOCK57:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP78:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 0 +; CHECK-NEXT: [[TMP78:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT58:%.*]] = insertelement <1 x double> undef, double [[TMP78]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT59:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT58]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP79:%.*]] = fmul <1 x double> [[BLOCK57]], [[SPLAT_SPLAT59]] ; CHECK-NEXT: [[BLOCK60:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP80:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 1 +; CHECK-NEXT: [[TMP80:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT61:%.*]] = insertelement <1 x double> undef, double [[TMP80]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT62:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT61]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP81:%.*]] = fmul <1 x double> [[BLOCK60]], [[SPLAT_SPLAT62]] ; CHECK-NEXT: [[TMP82:%.*]] = fadd <1 x double> [[TMP79]], [[TMP81]] ; CHECK-NEXT: [[BLOCK63:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP83:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 2 +; CHECK-NEXT: [[TMP83:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2 ; CHECK-NEXT: [[SPLAT_SPLATINSERT64:%.*]] = insertelement <1 x double> undef, double [[TMP83]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT65:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT64]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP84:%.*]] = fmul <1 x double> [[BLOCK63]], [[SPLAT_SPLAT65]] @@ -422,18 +428,18 @@ define void @transpose_multiply_add(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr, ; CHECK-NEXT: [[TMP86:%.*]] = shufflevector <1 x double> [[TMP85]], <1 x double> undef, <3 x i32> ; CHECK-NEXT: [[TMP87:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP86]], <3 x i32> ; CHECK-NEXT: [[BLOCK66:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP88:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 0 +; CHECK-NEXT: [[TMP88:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT67:%.*]] = insertelement <1 x double> undef, double [[TMP88]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT68:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT67]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP89:%.*]] = fmul <1 x double> [[BLOCK66]], [[SPLAT_SPLAT68]] ; CHECK-NEXT: [[BLOCK69:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP90:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 1 +; CHECK-NEXT: [[TMP90:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT70:%.*]] = insertelement <1 x double> undef, double [[TMP90]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT71:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT70]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP91:%.*]] = fmul <1 x double> [[BLOCK69]], [[SPLAT_SPLAT71]] ; CHECK-NEXT: [[TMP92:%.*]] = fadd <1 x double> [[TMP89]], [[TMP91]] ; CHECK-NEXT: [[BLOCK72:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP93:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 2 +; CHECK-NEXT: [[TMP93:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2 ; CHECK-NEXT: [[SPLAT_SPLATINSERT73:%.*]] = insertelement <1 x double> undef, double [[TMP93]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT74:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT73]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP94:%.*]] = fmul <1 x double> [[BLOCK72]], [[SPLAT_SPLAT74]] @@ -441,18 +447,18 @@ define void @transpose_multiply_add(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr, ; CHECK-NEXT: [[TMP96:%.*]] = shufflevector <1 x double> [[TMP95]], <1 x double> undef, <3 x i32> ; CHECK-NEXT: [[TMP97:%.*]] = shufflevector <3 x double> [[TMP87]], <3 x double> [[TMP96]], <3 x i32> ; CHECK-NEXT: [[BLOCK75:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP98:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 0 +; CHECK-NEXT: [[TMP98:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT76:%.*]] = insertelement <1 x double> undef, double [[TMP98]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT77:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT76]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP99:%.*]] = fmul <1 x double> [[BLOCK75]], [[SPLAT_SPLAT77]] ; CHECK-NEXT: [[BLOCK78:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP100:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 1 +; CHECK-NEXT: [[TMP100:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1 ; CHECK-NEXT: [[SPLAT_SPLATINSERT79:%.*]] = insertelement <1 x double> undef, double [[TMP100]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT80:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT79]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP101:%.*]] = fmul <1 x double> [[BLOCK78]], [[SPLAT_SPLAT80]] ; CHECK-NEXT: [[TMP102:%.*]] = fadd <1 x double> [[TMP99]], [[TMP101]] ; CHECK-NEXT: [[BLOCK81:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <1 x i32> -; CHECK-NEXT: [[TMP103:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 2 +; CHECK-NEXT: [[TMP103:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2 ; CHECK-NEXT: [[SPLAT_SPLATINSERT82:%.*]] = insertelement <1 x double> undef, double [[TMP103]], i32 0 ; CHECK-NEXT: [[SPLAT_SPLAT83:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT82]], <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP104:%.*]] = fmul <1 x double> [[BLOCK81]], [[SPLAT_SPLAT83]] @@ -465,19 +471,21 @@ define void @transpose_multiply_add(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr, ; Load %C. -; CHECK-NEXT: [[C:%.*]] = load <9 x double>, <9 x double>* [[C_PTR:%.*]] - -; Extract columns from %C. - -; CHECK-NEXT: [[SPLIT84:%.*]] = shufflevector <9 x double> [[C]], <9 x double> undef, <3 x i32> -; CHECK-NEXT: [[SPLIT85:%.*]] = shufflevector <9 x double> [[C]], <9 x double> undef, <3 x i32> -; CHECK-NEXT: [[SPLIT86:%.*]] = shufflevector <9 x double> [[C]], <9 x double> undef, <3 x i32> +; CHECK-NEXT: [[TMP110:%.*]] = bitcast <9 x double>* [[C_PTR:%.*]] to double* +; CHECK-NEXT: [[COL_CAST92:%.*]] = bitcast double* [[TMP110]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD93:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST92]], align 8 +; CHECK-NEXT: [[COL_GEP94:%.*]] = getelementptr double, double* [[TMP110]], i32 3 +; CHECK-NEXT: [[COL_CAST95:%.*]] = bitcast double* [[COL_GEP94]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD96:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST95]], align 8 +; CHECK-NEXT: [[COL_GEP97:%.*]] = getelementptr double, double* [[TMP110]], i32 6 +; CHECK-NEXT: [[COL_CAST98:%.*]] = bitcast double* [[COL_GEP97]] to <3 x double>* +; CHECK-NEXT: [[COL_LOAD99:%.*]] = load <3 x double>, <3 x double>* [[COL_CAST98]], align 8 ; Add column vectors. -; CHECK-NEXT: [[TMP108:%.*]] = fadd <3 x double> [[SPLIT84]], [[TMP47]] -; CHECK-NEXT: [[TMP109:%.*]] = fadd <3 x double> [[SPLIT85]], [[TMP77]] -; CHECK-NEXT: [[TMP110:%.*]] = fadd <3 x double> [[SPLIT86]], [[TMP107]] +; CHECK-NEXT: [[TMP108:%.*]] = fadd <3 x double> [[COL_LOAD93]], [[TMP47]] +; CHECK-NEXT: [[TMP109:%.*]] = fadd <3 x double> [[COL_LOAD96]], [[TMP77]] +; CHECK-NEXT: [[TMP110:%.*]] = fadd <3 x double> [[COL_LOAD99]], [[TMP107]] ; Store result columns. diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backward.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backward.ll new file mode 100644 index 0000000000000..89ca79649b879 --- /dev/null +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backward.ll @@ -0,0 +1,96 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -lower-matrix-intrinsics -S < %s | FileCheck %s +; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s + +define <8 x double> @fadd_transpose(<8 x double> %a, <8 x double> %b) { +; CHECK-LABEL: @fadd_transpose( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <8 x double> [[A:%.*]], <8 x double> undef, <2 x i32> +; CHECK-NEXT: [[SPLIT1:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <2 x i32> +; CHECK-NEXT: [[SPLIT2:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <2 x i32> +; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <2 x i32> +; CHECK-NEXT: [[SPLIT4:%.*]] = shufflevector <8 x double> [[B:%.*]], <8 x double> undef, <2 x i32> +; CHECK-NEXT: [[SPLIT5:%.*]] = shufflevector <8 x double> [[B]], <8 x double> undef, <2 x i32> +; CHECK-NEXT: [[SPLIT6:%.*]] = shufflevector <8 x double> [[B]], <8 x double> undef, <2 x i32> +; CHECK-NEXT: [[SPLIT7:%.*]] = shufflevector <8 x double> [[B]], <8 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> [[SPLIT]], [[SPLIT4]] +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[SPLIT1]], [[SPLIT5]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[SPLIT2]], [[SPLIT6]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[SPLIT3]], [[SPLIT7]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP0]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> undef, double [[TMP4]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x double> [[TMP5]], double [[TMP6]], i64 1 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x double> [[TMP7]], double [[TMP8]], i64 2 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x double> [[TMP9]], double [[TMP10]], i64 3 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP0]], i64 1 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x double> undef, double [[TMP12]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[TMP1]], i64 1 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x double> [[TMP13]], double [[TMP14]], i64 1 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x double> [[TMP15]], double [[TMP16]], i64 2 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x double> [[TMP17]], double [[TMP18]], i64 3 +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> [[TMP19]], <8 x i32> +; CHECK-NEXT: ret <8 x double> [[TMP20]] +; +entry: + %add = fadd <8 x double> %a, %b + %c = call <8 x double> @llvm.matrix.transpose(<8 x double> %add, i32 2, i32 4) + ret <8 x double> %c +} + +define <8 x double> @load_fadd_transpose(<8 x double>* %A.Ptr, <8 x double> %b) { +; CHECK-LABEL: @load_fadd_transpose( +; CHECK-NEXT: entry: + +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x double>* [[A_PTR:%.*]] to double* +; CHECK-NEXT: [[COL_CAST:%.*]] = bitcast double* [[TMP0]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST]], align 8 +; CHECK-NEXT: [[COL_GEP:%.*]] = getelementptr double, double* [[TMP0]], i32 2 +; CHECK-NEXT: [[COL_CAST1:%.*]] = bitcast double* [[COL_GEP]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST1]], align 8 +; CHECK-NEXT: [[COL_GEP3:%.*]] = getelementptr double, double* [[TMP0]], i32 4 +; CHECK-NEXT: [[COL_CAST4:%.*]] = bitcast double* [[COL_GEP3]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST4]], align 8 +; CHECK-NEXT: [[COL_GEP6:%.*]] = getelementptr double, double* [[TMP0]], i32 6 +; CHECK-NEXT: [[COL_CAST7:%.*]] = bitcast double* [[COL_GEP6]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <2 x double>, <2 x double>* [[COL_CAST7]], align 8 +; CHECK-NEXT: [[SPLIT4:%.*]] = shufflevector <8 x double> [[B:%.*]], <8 x double> undef, <2 x i32> +; CHECK-NEXT: [[SPLIT5:%.*]] = shufflevector <8 x double> [[B]], <8 x double> undef, <2 x i32> +; CHECK-NEXT: [[SPLIT6:%.*]] = shufflevector <8 x double> [[B]], <8 x double> undef, <2 x i32> +; CHECK-NEXT: [[SPLIT7:%.*]] = shufflevector <8 x double> [[B]], <8 x double> undef, <2 x i32> + +; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> [[COL_LOAD]], [[SPLIT4]] +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[COL_LOAD2]], [[SPLIT5]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[COL_LOAD5]], [[SPLIT6]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[COL_LOAD8]], [[SPLIT7]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP0]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> undef, double [[TMP4]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x double> [[TMP5]], double [[TMP6]], i64 1 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x double> [[TMP7]], double [[TMP8]], i64 2 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x double> [[TMP9]], double [[TMP10]], i64 3 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP0]], i64 1 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x double> undef, double [[TMP12]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[TMP1]], i64 1 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x double> [[TMP13]], double [[TMP14]], i64 1 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x double> [[TMP15]], double [[TMP16]], i64 2 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x double> [[TMP17]], double [[TMP18]], i64 3 +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> [[TMP19]], <8 x i32> +; CHECK-NEXT: ret <8 x double> [[TMP20]] +; +entry: + %a = load <8 x double>, <8 x double>* %A.Ptr + %add = fadd <8 x double> %a, %b + %c = call <8 x double> @llvm.matrix.transpose(<8 x double> %add, i32 2, i32 4) + ret <8 x double> %c +} + +declare <8 x double> @llvm.matrix.transpose(<8 x double>, i32, i32) diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backwards-unsupported.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backwards-unsupported.ll new file mode 100644 index 0000000000000..591cddd261cf6 --- /dev/null +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backwards-unsupported.ll @@ -0,0 +1,135 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -lower-matrix-intrinsics -S < %s | FileCheck %s +; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s + +; Check that we we use flattened vectors for PHI operands and extract the columns afterwards. +define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B, <9 x double> %C) { +; CHECK-LABEL: @unsupported_phi( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] + +; CHECK-LABEL: if.then: +; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <9 x double> [[A:%.*]], <9 x double> undef, <3 x i32> +; CHECK-NEXT: [[SPLIT4:%.*]] = shufflevector <9 x double> [[A]], <9 x double> undef, <3 x i32> +; CHECK-NEXT: [[SPLIT5:%.*]] = shufflevector <9 x double> [[A]], <9 x double> undef, <3 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x double> undef, double [[TMP0]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x double> [[TMP1]], double [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <3 x double> [[TMP3]], double [[TMP4]], i64 2 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x double> undef, double [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 1 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <3 x double> [[TMP7]], double [[TMP8]], i64 1 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 1 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <3 x double> [[TMP9]], double [[TMP10]], i64 2 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 2 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <3 x double> undef, double [[TMP12]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 2 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <3 x double> [[TMP13]], double [[TMP14]], i64 1 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 2 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <3 x double> [[TMP15]], double [[TMP16]], i64 2 +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> [[TMP11]], <6 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <6 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <6 x double> [[TMP18]], <6 x double> [[TMP19]], <9 x i32> +; CHECK-NEXT: br label [[IF_END:%.*]] + +; CHECK-LABEL: if.else: +; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <9 x double> [[B:%.*]], <9 x double> undef, <3 x i32> +; CHECK-NEXT: [[SPLIT1:%.*]] = shufflevector <9 x double> [[B]], <9 x double> undef, <3 x i32> +; CHECK-NEXT: [[SPLIT2:%.*]] = shufflevector <9 x double> [[B]], <9 x double> undef, <3 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <3 x double> [[SPLIT]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <3 x double> undef, double [[TMP21]], i64 0 +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <3 x double> [[SPLIT1]], i64 0 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <3 x double> [[TMP22]], double [[TMP23]], i64 1 +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <3 x double> [[SPLIT2]], i64 0 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <3 x double> [[TMP24]], double [[TMP25]], i64 2 +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <3 x double> [[SPLIT]], i64 1 +; CHECK-NEXT: [[TMP28:%.*]] = insertelement <3 x double> undef, double [[TMP27]], i64 0 +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <3 x double> [[SPLIT1]], i64 1 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <3 x double> [[TMP28]], double [[TMP29]], i64 1 +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <3 x double> [[SPLIT2]], i64 1 +; CHECK-NEXT: [[TMP32:%.*]] = insertelement <3 x double> [[TMP30]], double [[TMP31]], i64 2 +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <3 x double> [[SPLIT]], i64 2 +; CHECK-NEXT: [[TMP34:%.*]] = insertelement <3 x double> undef, double [[TMP33]], i64 0 +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <3 x double> [[SPLIT1]], i64 2 +; CHECK-NEXT: [[TMP36:%.*]] = insertelement <3 x double> [[TMP34]], double [[TMP35]], i64 1 +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <3 x double> [[SPLIT2]], i64 2 +; CHECK-NEXT: [[TMP38:%.*]] = insertelement <3 x double> [[TMP36]], double [[TMP37]], i64 2 +; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <3 x double> [[TMP26]], <3 x double> [[TMP32]], <6 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <3 x double> [[TMP38]], <3 x double> undef, <6 x i32> +; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <6 x double> [[TMP39]], <6 x double> [[TMP40]], <9 x i32> +; CHECK-NEXT: br label [[IF_END]] + +; CHECK-LABEL: if.end: +; CHECK-NEXT: [[MERGE:%.*]] = phi <9 x double> [ [[TMP20]], [[IF_THEN]] ], [ [[TMP41]], [[IF_ELSE]] ] +; CHECK-NEXT: [[SPLIT6:%.*]] = shufflevector <9 x double> [[C:%.*]], <9 x double> undef, <3 x i32> +; CHECK-NEXT: [[SPLIT7:%.*]] = shufflevector <9 x double> [[C]], <9 x double> undef, <3 x i32> +; CHECK-NEXT: [[SPLIT8:%.*]] = shufflevector <9 x double> [[C]], <9 x double> undef, <3 x i32> +; CHECK-NEXT: [[SPLIT9:%.*]] = shufflevector <9 x double> [[MERGE]], <9 x double> undef, <3 x i32> +; CHECK-NEXT: [[SPLIT10:%.*]] = shufflevector <9 x double> [[MERGE]], <9 x double> undef, <3 x i32> +; CHECK-NEXT: [[SPLIT11:%.*]] = shufflevector <9 x double> [[MERGE]], <9 x double> undef, <3 x i32> +; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> undef, <1 x i32> zeroinitializer +; +entry: + br i1 %cond, label %if.then, label %if.else + +if.then: ; preds = %entry + %A.trans = tail call <9 x double> @llvm.matrix.transpose.v9f64(<9 x double> %A, i32 3, i32 3) + br label %if.end + +if.else: ; preds = %entry + %B.trans = tail call <9 x double> @llvm.matrix.transpose.v9f64(<9 x double> %B, i32 3, i32 3) + br label %if.end + +if.end: ; preds = %if.then, %if.else + %merge = phi <9 x double> [ %A.trans, %if.then], [ %B.trans, %if.else ] + %res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v9f64.v9f64(<9 x double> %C, <9 x double> %merge, i32 3, i32 3, i32 3) + ret <9 x double> %res +} + +; Make sure we use a flattened vector when calling @foo and the use its flat vector result properly. +define <9 x double> @unsupported_call(i1 %cond, <9 x double> %A, <9 x double> %B) { +; CHECK-LABEL: @unsupported_call( +; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <9 x double> [[A:%.*]], <9 x double> undef, <3 x i32> +; CHECK-NEXT: [[SPLIT1:%.*]] = shufflevector <9 x double> [[A]], <9 x double> undef, <3 x i32> +; CHECK-NEXT: [[SPLIT2:%.*]] = shufflevector <9 x double> [[A]], <9 x double> undef, <3 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x double> [[SPLIT]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <3 x double> undef, double [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x double> [[SPLIT1]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <3 x double> [[TMP2]], double [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <3 x double> [[SPLIT2]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <3 x double> [[TMP4]], double [[TMP5]], i64 2 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <3 x double> [[SPLIT]], i64 1 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <3 x double> undef, double [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <3 x double> [[SPLIT1]], i64 1 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <3 x double> [[TMP8]], double [[TMP9]], i64 1 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <3 x double> [[SPLIT2]], i64 1 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <3 x double> [[TMP10]], double [[TMP11]], i64 2 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <3 x double> [[SPLIT]], i64 2 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <3 x double> undef, double [[TMP13]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <3 x double> [[SPLIT1]], i64 2 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <3 x double> [[TMP14]], double [[TMP15]], i64 1 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <3 x double> [[SPLIT2]], i64 2 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <3 x double> [[TMP16]], double [[TMP17]], i64 2 +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <3 x double> [[TMP6]], <3 x double> [[TMP12]], <6 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <3 x double> [[TMP18]], <3 x double> undef, <6 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <6 x double> [[TMP19]], <6 x double> [[TMP20]], <9 x i32> +; CHECK-NEXT: [[A_FOO:%.*]] = call <9 x double> @foo(<9 x double> [[TMP21]]) +; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <9 x double> [[B:%.*]], <9 x double> undef, <3 x i32> +; CHECK-NEXT: [[SPLIT4:%.*]] = shufflevector <9 x double> [[B]], <9 x double> undef, <3 x i32> +; CHECK-NEXT: [[SPLIT5:%.*]] = shufflevector <9 x double> [[B]], <9 x double> undef, <3 x i32> +; CHECK-NEXT: [[SPLIT6:%.*]] = shufflevector <9 x double> [[A_FOO]], <9 x double> undef, <3 x i32> +; CHECK-NEXT: [[SPLIT7:%.*]] = shufflevector <9 x double> [[A_FOO]], <9 x double> undef, <3 x i32> +; CHECK-NEXT: [[SPLIT8:%.*]] = shufflevector <9 x double> [[A_FOO]], <9 x double> undef, <3 x i32> +; + %A.trans = tail call <9 x double> @llvm.matrix.transpose.v9f64(<9 x double> %A, i32 3, i32 3) + %A.foo = call <9 x double> @foo(<9 x double> %A.trans) + %res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v9f64.v9f64(<9 x double> %B, <9 x double> %A.foo, i32 3, i32 3, i32 3) + ret <9 x double> %res +} + +declare <9 x double> @llvm.matrix.multiply.v9f64.v9f64.v9f64(<9 x double>, <9 x double>, i32 immarg, i32 immarg, i32 immarg) +declare <9 x double> @llvm.matrix.transpose.v9f64(<9 x double>, i32 immarg, i32 immarg) +declare <9 x double> @foo(<9 x double>) diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-multiple-iterations.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-multiple-iterations.ll new file mode 100644 index 0000000000000..38200b3883dc0 --- /dev/null +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-multiple-iterations.ll @@ -0,0 +1,84 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -lower-matrix-intrinsics -S < %s | FileCheck %s +; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s + + +; Make sure we propagate in multiple iterations. First, we back-propagate the +; shape information from the transpose to %A, in the next iteration we +; forward-propagate it to %Mul, and then back to %B. +define <16 x double> @backpropagation_iterations(<16 x double>* %A.Ptr, <16 x double>* %B.Ptr) { +; CHECK-LABEL: @backpropagation_iterations( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x double>* [[A_PTR:%.*]] to double* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[TMP1]] to <4 x double>* +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x double>, <4 x double>* [[TMP2]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr double, double* [[TMP1]], i32 4 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[TMP5]] to <4 x double>* +; CHECK-NEXT: [[TMP7:%.*]] = load <4 x double>, <4 x double>* [[TMP6]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr double, double* [[TMP1]], i32 8 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast double* [[TMP9]] to <4 x double>* +; CHECK-NEXT: [[TMP11:%.*]] = load <4 x double>, <4 x double>* [[TMP10]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP1]], i32 12 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to <4 x double>* +; CHECK-NEXT: [[TMP15:%.*]] = load <4 x double>, <4 x double>* [[TMP14]], align 8 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x double> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x double> undef, double [[TMP16]], i64 0 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x double> [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x double> [[TMP17]], double [[TMP18]], i64 1 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x double> [[TMP11]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x double> [[TMP19]], double [[TMP20]], i64 2 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x double> [[TMP15]], i64 0 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x double> [[TMP21]], double [[TMP22]], i64 3 +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x double> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x double> undef, double [[TMP24]], i64 0 +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x double> [[TMP7]], i64 1 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x double> [[TMP25]], double [[TMP26]], i64 1 +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x double> [[TMP11]], i64 1 +; CHECK-NEXT: [[TMP29:%.*]] = insertelement <4 x double> [[TMP27]], double [[TMP28]], i64 2 +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x double> [[TMP15]], i64 1 +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x double> [[TMP29]], double [[TMP30]], i64 3 +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x double> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x double> undef, double [[TMP32]], i64 0 +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x double> [[TMP7]], i64 2 +; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x double> [[TMP33]], double [[TMP34]], i64 1 +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x double> [[TMP11]], i64 2 +; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x double> [[TMP35]], double [[TMP36]], i64 2 +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <4 x double> [[TMP15]], i64 2 +; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x double> [[TMP37]], double [[TMP38]], i64 3 +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <4 x double> [[TMP3]], i64 3 +; CHECK-NEXT: [[TMP41:%.*]] = insertelement <4 x double> undef, double [[TMP40]], i64 0 +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x double> [[TMP7]], i64 3 +; CHECK-NEXT: [[TMP43:%.*]] = insertelement <4 x double> [[TMP41]], double [[TMP42]], i64 1 +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x double> [[TMP11]], i64 3 +; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x double> [[TMP43]], double [[TMP44]], i64 2 +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <4 x double> [[TMP15]], i64 3 +; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x double> [[TMP45]], double [[TMP46]], i64 3 +; CHECK-NEXT: [[TMP48:%.*]] = bitcast <16 x double>* [[B_PTR:%.*]] to double* +; CHECK-NEXT: [[TMP49:%.*]] = bitcast double* [[TMP48]] to <4 x double>* +; CHECK-NEXT: [[TMP50:%.*]] = load <4 x double>, <4 x double>* [[TMP49]], align 8 +; CHECK-NEXT: [[TMP52:%.*]] = getelementptr double, double* [[TMP48]], i32 4 +; CHECK-NEXT: [[TMP53:%.*]] = bitcast double* [[TMP52]] to <4 x double>* +; CHECK-NEXT: [[TMP54:%.*]] = load <4 x double>, <4 x double>* [[TMP53]], align 8 +; CHECK-NEXT: [[TMP56:%.*]] = getelementptr double, double* [[TMP48]], i32 8 +; CHECK-NEXT: [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>* +; CHECK-NEXT: [[TMP58:%.*]] = load <4 x double>, <4 x double>* [[TMP57]], align 8 +; CHECK-NEXT: [[TMP60:%.*]] = getelementptr double, double* [[TMP48]], i32 12 +; CHECK-NEXT: [[TMP61:%.*]] = bitcast double* [[TMP60]] to <4 x double>* +; CHECK-NEXT: [[TMP62:%.*]] = load <4 x double>, <4 x double>* [[TMP61]], align 8 +; CHECK-NEXT: [[TMP63:%.*]] = fmul <4 x double> [[TMP3]], [[TMP50]] +; CHECK-NEXT: [[TMP64:%.*]] = fmul <4 x double> [[TMP7]], [[TMP54]] +; CHECK-NEXT: [[TMP65:%.*]] = fmul <4 x double> [[TMP11]], [[TMP58]] +; CHECK-NEXT: [[TMP66:%.*]] = fmul <4 x double> [[TMP15]], [[TMP62]] +; CHECK-NEXT: [[TMP67:%.*]] = shufflevector <4 x double> [[TMP63]], <4 x double> [[TMP64]], <8 x i32> +; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <4 x double> [[TMP65]], <4 x double> [[TMP66]], <8 x i32> +; CHECK-NEXT: [[TMP69:%.*]] = shufflevector <8 x double> [[TMP67]], <8 x double> [[TMP68]], <16 x i32> +; CHECK-NEXT: ret <16 x double> [[TMP69]] +; + %A = load <16 x double>, <16 x double>* %A.Ptr + %A.trans = tail call <16 x double> @llvm.matrix.transpose.v16f64(<16 x double> %A, i32 4, i32 4) + %B = load <16 x double>, <16 x double>* %B.Ptr + %Mul = fmul <16 x double> %A, %B + ret <16 x double> %Mul +} + +declare <16 x double> @llvm.matrix.multiply.v16f64.v16f64.v16f64(<16 x double>, <16 x double>, i32 immarg, i32 immarg, i32 immarg) +declare <16 x double> @llvm.matrix.transpose.v16f64(<16 x double>, i32 immarg, i32 immarg) diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp index adbf1b3b8c608..65b831c96e8f6 100644 --- a/llvm/unittests/ADT/APFloatTest.cpp +++ b/llvm/unittests/ADT/APFloatTest.cpp @@ -22,15 +22,18 @@ using namespace llvm; static std::string convertToErrorFromString(StringRef Str) { llvm::APFloat F(0.0); - auto ErrOrStatus = + auto StatusOrErr = F.convertFromString(Str, llvm::APFloat::rmNearestTiesToEven); - EXPECT_TRUE(!ErrOrStatus); - return toString(ErrOrStatus.takeError()); + EXPECT_TRUE(!StatusOrErr); + return toString(StatusOrErr.takeError()); } static double convertToDoubleFromString(StringRef Str) { llvm::APFloat F(0.0); - EXPECT_FALSE(!F.convertFromString(Str, llvm::APFloat::rmNearestTiesToEven)); + auto StatusOrErr = + F.convertFromString(Str, llvm::APFloat::rmNearestTiesToEven); + EXPECT_FALSE(!StatusOrErr); + consumeError(StatusOrErr.takeError()); return F.convertToDouble(); } diff --git a/llvm/unittests/ADT/TripleTest.cpp b/llvm/unittests/ADT/TripleTest.cpp index d8123bbbfdf7a..ef7f82d268e2e 100644 --- a/llvm/unittests/ADT/TripleTest.cpp +++ b/llvm/unittests/ADT/TripleTest.cpp @@ -163,6 +163,13 @@ TEST(TripleTest, ParsedIDs) { EXPECT_EQ(Triple::UnknownOS, T.getOS()); EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment()); + T = Triple("powerpcspe-unknown-freebsd"); + EXPECT_EQ(Triple::ppc, T.getArch()); + EXPECT_EQ(Triple::PPCSubArch_spe, T.getSubArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::FreeBSD, T.getOS()); + EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment()); + T = Triple("arm-none-none-eabi"); EXPECT_EQ(Triple::arm, T.getArch()); EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); @@ -312,6 +319,12 @@ TEST(TripleTest, ParsedIDs) { EXPECT_EQ(Triple::AMDPAL, T.getOS()); EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment()); + T = Triple("ve-unknown-linux"); + EXPECT_EQ(Triple::ve, T.getArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::Linux, T.getOS()); + EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment()); + T = Triple("riscv32-unknown-unknown"); EXPECT_EQ(Triple::riscv32, T.getArch()); EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); @@ -715,6 +728,8 @@ TEST(TripleTest, Normalization) { Triple::normalize("i686-linux")); // i686-pc-linux-gnu EXPECT_EQ("arm-none-unknown-eabi", Triple::normalize("arm-none-eabi")); // arm-none-eabi + EXPECT_EQ("ve-unknown-linux", + Triple::normalize("ve-linux")); // ve-linux EXPECT_EQ("wasm32-unknown-wasi", Triple::normalize("wasm32-wasi")); // wasm32-unknown-wasi EXPECT_EQ("wasm64-unknown-wasi", diff --git a/llvm/unittests/CodeGen/MachineOperandTest.cpp b/llvm/unittests/CodeGen/MachineOperandTest.cpp index faa471f2260c7..7e60fab281545 100644 --- a/llvm/unittests/CodeGen/MachineOperandTest.cpp +++ b/llvm/unittests/CodeGen/MachineOperandTest.cpp @@ -310,7 +310,7 @@ TEST(MachineOperandTest, PrintMetadata) { std::string str; // Print a MachineOperand containing a metadata node. raw_string_ostream OS(str); - MO.print(OS, MST, LLT{}, /*PrintDef=*/false, /*IsStandalone=*/false, + MO.print(OS, MST, LLT{}, /*OpIdx*/~0U, /*PrintDef=*/false, /*IsStandalone=*/false, /*ShouldPrintRegisterTies=*/false, 0, /*TRI=*/nullptr, /*IntrinsicInfo=*/nullptr); ASSERT_TRUE(OS.str() == "!0"); diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.h b/llvm/utils/TableGen/CodeGenDAGPatterns.h index c61b55052533a..2c081b670609d 100644 --- a/llvm/utils/TableGen/CodeGenDAGPatterns.h +++ b/llvm/utils/TableGen/CodeGenDAGPatterns.h @@ -1195,12 +1195,6 @@ class CodeGenDAGPatterns { return F->second; } - typedef std::map::const_iterator - nx_iterator; - nx_iterator nx_begin() const { return SDNodeXForms.begin(); } - nx_iterator nx_end() const { return SDNodeXForms.end(); } - - const ComplexPattern &getComplexPattern(Record *R) const { auto F = ComplexPatterns.find(R); assert(F != ComplexPatterns.end() && "Unknown addressing mode!"); diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index b2e3903eda8b5..99b067d5b5270 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -5283,7 +5283,7 @@ void GlobalISelEmitter::run(raw_ostream &OS) { OS << Target.getName() << "InstructionSelector::CustomRendererFn\n" << Target.getName() << "InstructionSelector::CustomRenderers[] = {\n" - << " nullptr, // GICP_Invalid\n"; + << " nullptr, // GICR_Invalid\n"; for (const auto &Record : CustomRendererFns) OS << " &" << Target.getName() << "InstructionSelector::" << Record->getValueAsString("RendererFn") diff --git a/mlir/include/mlir/Dialect/GPU/GPUDialect.h b/mlir/include/mlir/Dialect/GPU/GPUDialect.h index 1776ff7198052..a21b51487722e 100644 --- a/mlir/include/mlir/Dialect/GPU/GPUDialect.h +++ b/mlir/include/mlir/Dialect/GPU/GPUDialect.h @@ -53,6 +53,11 @@ class GPUDialect : public Dialect { /// 'gpu.kernel' attribute. static bool isKernel(Operation *op); + /// Returns the number of workgroup (thread, block) dimensions supported in + /// the GPU dialect. + // TODO(zinenko,herhut): consider generalizing this. + static unsigned getNumWorkgroupDimensions() { return 3; } + /// Returns the numeric value used to identify the workgroup memory address /// space. static unsigned getWorkgroupAddressSpace() { return 3; } diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td index b5b93e9b553b5..766ddbf202c25 100644 --- a/mlir/include/mlir/Dialect/GPU/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td @@ -117,6 +117,10 @@ def GPU_GPUFuncOp : GPU_Op<"func", [FunctionLike, IsolatedFromAbove, Symbol]> { ]; let extraClassDeclaration = [{ + /// Adds a workgroup attribution of the MemRef type with the given shape and + /// element type. + Value addWorkgroupAttribution(ArrayRef shape, Type elementType); + /// Returns `true` if the GPU function defined by this Op is a kernel, i.e. /// it is intended to be launched from host. bool isKernel() { diff --git a/mlir/include/mlir/Dialect/GPU/MemoryPromotion.h b/mlir/include/mlir/Dialect/GPU/MemoryPromotion.h new file mode 100644 index 0000000000000..09c1371708f25 --- /dev/null +++ b/mlir/include/mlir/Dialect/GPU/MemoryPromotion.h @@ -0,0 +1,29 @@ +//===- MemoryPromotion.h - Utilities for moving data across GPU -*- C++ -*-===// +// +// Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header file declares the utility functions that generate IR copying +// the data between different levels of memory hierarchy. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_GPU_MEMORYPROMOTION_H +#define MLIR_DIALECT_GPU_MEMORYPROMOTION_H + +namespace mlir { + +namespace gpu { +class GPUFuncOp; +} + +/// Promotes a function argument to workgroup memory in the given function. The +/// copies will be inserted in the beginning and in the end of the function. +void promoteToWorkgroupMemory(gpu::GPUFuncOp op, unsigned arg); + +} // end namespace mlir + +#endif // MLIR_DIALECT_GPU_MEMORYPROMOTION_H diff --git a/mlir/include/mlir/IR/Attributes.h b/mlir/include/mlir/IR/Attributes.h index b8398580f61c9..64b8063bdcb58 100644 --- a/mlir/include/mlir/IR/Attributes.h +++ b/mlir/include/mlir/IR/Attributes.h @@ -215,6 +215,25 @@ class ArrayAttr : public Attribute::AttrBase + class attr_value_iterator final + : public llvm::mapped_iterator { + public: + explicit attr_value_iterator(iterator it) + : llvm::mapped_iterator( + it, [](Attribute attr) { return attr.cast(); }) {} + AttrTy operator*() { return (*this->I).template cast(); } + }; + +public: + template + llvm::iterator_range> getAsRange() { + return llvm::make_range(attr_value_iterator(begin()), + attr_value_iterator(end())); + } }; //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/IR/Block.h b/mlir/include/mlir/IR/Block.h index c868148f95e2e..2d3eb18d72934 100644 --- a/mlir/include/mlir/IR/Block.h +++ b/mlir/include/mlir/IR/Block.h @@ -79,6 +79,11 @@ class Block : public IRObjectWithUseList, /// Add one value to the argument list. BlockArgument addArgument(Type type); + /// Insert one value to the position in the argument list indicated by the + /// given iterator. The existing arguments are shifted. The block is expected + /// not to have predecessors. + BlockArgument insertArgument(args_iterator it, Type type); + /// Add one argument to the argument list for each type specified in the list. iterator_range addArguments(ArrayRef types); diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index b48930c4ddab0..1fbee9742e0d0 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -6,10 +6,11 @@ // //===----------------------------------------------------------------------===// +#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h" #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" -#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/StandardOps/Ops.h" #include "mlir/Dialect/VectorOps/VectorOps.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" @@ -31,6 +32,7 @@ #include "llvm/Support/ErrorHandling.h" using namespace mlir; +using namespace mlir::vector; template static LLVM::LLVMType getPtrToElementType(T containerType, @@ -68,6 +70,17 @@ static Value insertOne(ConversionPatternRewriter &rewriter, rewriter.getI64ArrayAttr(pos)); } +// Helper that picks the proper sequence for inserting. +static Value insertOne(PatternRewriter &rewriter, Location loc, Value from, + Value into, int64_t offset) { + auto vectorType = into.getType().cast(); + if (vectorType.getRank() > 1) + return rewriter.create(loc, from, into, offset); + return rewriter.create( + loc, vectorType, from, into, + rewriter.create(loc, offset)); +} + // Helper that picks the proper sequence for extracting. static Value extractOne(ConversionPatternRewriter &rewriter, LLVMTypeConverter &lowering, Location loc, Value val, @@ -84,6 +97,32 @@ static Value extractOne(ConversionPatternRewriter &rewriter, rewriter.getI64ArrayAttr(pos)); } +// Helper that picks the proper sequence for extracting. +static Value extractOne(PatternRewriter &rewriter, Location loc, Value vector, + int64_t offset) { + auto vectorType = vector.getType().cast(); + if (vectorType.getRank() > 1) + return rewriter.create(loc, vector, offset); + return rewriter.create( + loc, vectorType.getElementType(), vector, + rewriter.create(loc, offset)); +} + +// Helper that returns a subset of `arrayAttr` as a vector of int64_t. +// TODO(rriddle): Better support for attribute subtype forwarding + slicing. +static SmallVector getI64SubArray(ArrayAttr arrayAttr, + unsigned dropFront = 0, + unsigned dropBack = 0) { + assert(arrayAttr.size() > dropFront + dropBack && "Out of bounds"); + auto range = arrayAttr.getAsRange(); + SmallVector res; + res.reserve(arrayAttr.size() - dropFront - dropBack); + for (auto it = range.begin() + dropFront, eit = range.end() - dropBack; + it != eit; ++it) + res.push_back((*it).getValue().getSExtValue()); + return res; +} + class VectorBroadcastOpConversion : public LLVMOpLowering { public: explicit VectorBroadcastOpConversion(MLIRContext *context, @@ -462,6 +501,139 @@ class VectorInsertOpConversion : public LLVMOpLowering { } }; +// When ranks are different, InsertStridedSlice needs to extract a properly +// ranked vector from the destination vector into which to insert. This pattern +// only takes care of this part and forwards the rest of the conversion to +// another pattern that converts InsertStridedSlice for operands of the same +// rank. +// +// RewritePattern for InsertStridedSliceOp where source and destination vectors +// have different ranks. In this case: +// 1. the proper subvector is extracted from the destination vector +// 2. a new InsertStridedSlice op is created to insert the source in the +// destination subvector +// 3. the destination subvector is inserted back in the proper place +// 4. the op is replaced by the result of step 3. +// The new InsertStridedSlice from step 2. will be picked up by a +// `VectorInsertStridedSliceOpSameRankRewritePattern`. +class VectorInsertStridedSliceOpDifferentRankRewritePattern + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + PatternMatchResult matchAndRewrite(InsertStridedSliceOp op, + PatternRewriter &rewriter) const override { + auto srcType = op.getSourceVectorType(); + auto dstType = op.getDestVectorType(); + + if (op.offsets().getValue().empty()) + return matchFailure(); + + auto loc = op.getLoc(); + int64_t rankDiff = dstType.getRank() - srcType.getRank(); + assert(rankDiff >= 0); + if (rankDiff == 0) + return matchFailure(); + + int64_t rankRest = dstType.getRank() - rankDiff; + // Extract / insert the subvector of matching rank and InsertStridedSlice + // on it. + Value extracted = + rewriter.create(loc, op.dest(), + getI64SubArray(op.offsets(), /*dropFront=*/0, + /*dropFront=*/rankRest)); + // A different pattern will kick in for InsertStridedSlice with matching + // ranks. + auto stridedSliceInnerOp = rewriter.create( + loc, op.source(), extracted, + getI64SubArray(op.offsets(), /*dropFront=*/rankDiff), + getI64SubArray(op.strides(), /*dropFront=*/rankDiff)); + rewriter.replaceOpWithNewOp( + op, stridedSliceInnerOp.getResult(), op.dest(), + getI64SubArray(op.offsets(), /*dropFront=*/0, + /*dropFront=*/rankRest)); + return matchSuccess(); + } +}; + +// RewritePattern for InsertStridedSliceOp where source and destination vectors +// have the same rank. In this case, we reduce +// 1. the proper subvector is extracted from the destination vector +// 2. a new InsertStridedSlice op is created to insert the source in the +// destination subvector +// 3. the destination subvector is inserted back in the proper place +// 4. the op is replaced by the result of step 3. +// The new InsertStridedSlice from step 2. will be picked up by a +// `VectorInsertStridedSliceOpSameRankRewritePattern`. +class VectorInsertStridedSliceOpSameRankRewritePattern + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + PatternMatchResult matchAndRewrite(InsertStridedSliceOp op, + PatternRewriter &rewriter) const override { + auto srcType = op.getSourceVectorType(); + auto dstType = op.getDestVectorType(); + + if (op.offsets().getValue().empty()) + return matchFailure(); + + int64_t rankDiff = dstType.getRank() - srcType.getRank(); + assert(rankDiff >= 0); + if (rankDiff != 0) + return matchFailure(); + + if (srcType == dstType) { + rewriter.replaceOp(op, op.source()); + return matchSuccess(); + } + + int64_t offset = + op.offsets().getValue().front().cast().getInt(); + int64_t size = srcType.getShape().front(); + int64_t stride = + op.strides().getValue().front().cast().getInt(); + + auto loc = op.getLoc(); + Value res = op.dest(); + // For each slice of the source vector along the most major dimension. + for (int64_t off = offset, e = offset + size * stride, idx = 0; off < e; + off += stride, ++idx) { + // 1. extract the proper subvector (or element) from source + Value extractedSource = extractOne(rewriter, loc, op.source(), idx); + if (extractedSource.getType().isa()) { + // 2. If we have a vector, extract the proper subvector from destination + // Otherwise we are at the element level and no need to recurse. + Value extractedDest = extractOne(rewriter, loc, op.dest(), off); + // 3. Reduce the problem to lowering a new InsertStridedSlice op with + // smaller rank. + InsertStridedSliceOp insertStridedSliceOp = + rewriter.create( + loc, extractedSource, extractedDest, + getI64SubArray(op.offsets(), /* dropFront=*/1), + getI64SubArray(op.strides(), /* dropFront=*/1)); + // Call matchAndRewrite recursively from within the pattern. This + // circumvents the current limitation that a given pattern cannot + // be called multiple times by the PatternRewrite infrastructure (to + // avoid infinite recursion, but in this case, infinite recursion + // cannot happen because the rank is strictly decreasing). + // TODO(rriddle, nicolasvasilache) Implement something like a hook for + // a potential function that must decrease and allow the same pattern + // multiple times. + auto success = matchAndRewrite(insertStridedSliceOp, rewriter); + (void)success; + assert(success && "Unexpected failure"); + extractedSource = insertStridedSliceOp; + } + // 4. Insert the extractedSource into the res vector. + res = insertOne(rewriter, loc, extractedSource, res, off); + } + + rewriter.replaceOp(op, res); + return matchSuccess(); + } +}; + class VectorOuterProductOpConversion : public LLVMOpLowering { public: explicit VectorOuterProductOpConversion(MLIRContext *context, @@ -723,15 +895,71 @@ class VectorPrintOpConversion : public LLVMOpLowering { } }; +/// Progressive lowering of StridedSliceOp to either: +/// 1. extractelement + insertelement for the 1-D case +/// 2. extract + optional strided_slice + insert for the n-D case. +class VectorStridedSliceOpConversion : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + PatternMatchResult matchAndRewrite(StridedSliceOp op, + PatternRewriter &rewriter) const override { + auto dstType = op.getResult().getType().cast(); + + assert(!op.offsets().getValue().empty() && "Unexpected empty offsets"); + + int64_t offset = + op.offsets().getValue().front().cast().getInt(); + int64_t size = op.sizes().getValue().front().cast().getInt(); + int64_t stride = + op.strides().getValue().front().cast().getInt(); + + auto loc = op.getLoc(); + auto elemType = dstType.getElementType(); + assert(elemType.isIntOrIndexOrFloat()); + Value zero = rewriter.create(loc, elemType, + rewriter.getZeroAttr(elemType)); + Value res = rewriter.create(loc, dstType, zero); + for (int64_t off = offset, e = offset + size * stride, idx = 0; off < e; + off += stride, ++idx) { + Value extracted = extractOne(rewriter, loc, op.vector(), off); + if (op.offsets().getValue().size() > 1) { + StridedSliceOp stridedSliceOp = rewriter.create( + loc, extracted, getI64SubArray(op.offsets(), /* dropFront=*/1), + getI64SubArray(op.sizes(), /* dropFront=*/1), + getI64SubArray(op.strides(), /* dropFront=*/1)); + // Call matchAndRewrite recursively from within the pattern. This + // circumvents the current limitation that a given pattern cannot + // be called multiple times by the PatternRewrite infrastructure (to + // avoid infinite recursion, but in this case, infinite recursion + // cannot happen because the rank is strictly decreasing). + // TODO(rriddle, nicolasvasilache) Implement something like a hook for + // a potential function that must decrease and allow the same pattern + // multiple times. + auto success = matchAndRewrite(stridedSliceOp, rewriter); + (void)success; + assert(success && "Unexpected failure"); + extracted = stridedSliceOp; + } + res = insertOne(rewriter, loc, extracted, res, idx); + } + rewriter.replaceOp(op, {res}); + return matchSuccess(); + } +}; + /// Populate the given list with patterns that convert from Vector to LLVM. void mlir::populateVectorToLLVMConversionPatterns( LLVMTypeConverter &converter, OwningRewritePatternList &patterns) { + MLIRContext *ctx = converter.getDialect()->getContext(); + patterns.insert(ctx); patterns.insert(converter.getDialect()->getContext(), - converter); + VectorPrintOpConversion>(ctx, converter); } namespace { diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt index 6fe45ba49ef56..dbf05ac6ace95 100644 --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -2,9 +2,25 @@ add_llvm_library(MLIRGPU IR/GPUDialect.cpp IR/DialectRegistration.cpp Transforms/KernelOutlining.cpp + Transforms/MemoryPromotion.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU ) -add_dependencies(MLIRGPU MLIRGPUOpsIncGen MLIRIR MLIRLLVMIR LLVMSupport) -target_link_libraries(MLIRGPU MLIRIR MLIRLLVMIR MLIRStandardOps LLVMSupport) +add_dependencies(MLIRGPU + MLIRGPUOpsIncGen + MLIREDSC + MLIRIR + MLIRLLVMIR + MLIRLoopOps + MLIRSupport + MLIRTransformUtils + LLVMSupport) +target_link_libraries(MLIRGPU + MLIREDSC + MLIRIR + MLIRLLVMIR + MLIRLoopOps + MLIRSupport + MLIRTransformUtils + LLVMSupport) diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index bda8032fc21a6..32d7fae65d9ce 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -593,6 +593,24 @@ LogicalResult verify(LaunchFuncOp op) { // GPUFuncOp //===----------------------------------------------------------------------===// +/// Adds a workgroup attribution to "op" of the MemRef type with the given shape +/// and element type. +Value GPUFuncOp::addWorkgroupAttribution(ArrayRef shape, + Type elementType) { + unsigned pos = getNumFuncArguments() + getNumWorkgroupAttributions(); + Block &bodyBlock = body().front(); + Value attribution = bodyBlock.insertArgument( + std::next(bodyBlock.args_begin(), pos), + MemRefType::get(shape, elementType, /*affineMapComposition=*/{}, + GPUDialect::getWorkgroupAddressSpace())); + auto numWorkgroupBuffersAttr = + getAttrOfType(getNumWorkgroupAttributionsAttrName()); + setAttr(getNumWorkgroupAttributionsAttrName(), + IntegerAttr::get(numWorkgroupBuffersAttr.getType(), + numWorkgroupBuffersAttr.getValue() + 1)); + return attribution; +} + void GPUFuncOp::build(Builder *builder, OperationState &result, StringRef name, FunctionType type, ArrayRef workgroupAttributions, ArrayRef privateAttributions, diff --git a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp new file mode 100644 index 0000000000000..f01a430a216da --- /dev/null +++ b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp @@ -0,0 +1,173 @@ +//===- MemoryPromotion.cpp - Utilities for moving data across GPU memories ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements utilities that allow one to create IR moving the data +// across different levels of the GPU memory hierarchy. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/GPU/MemoryPromotion.h" +#include "mlir/Dialect/GPU/GPUDialect.h" +#include "mlir/Dialect/LoopOps/LoopOps.h" +#include "mlir/EDSC/Builders.h" +#include "mlir/EDSC/Helpers.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Support/Functional.h" +#include "mlir/Transforms/LoopUtils.h" + +using namespace mlir; +using namespace mlir::gpu; + +/// Returns the textual name of a GPU dimension. +static StringRef getDimName(unsigned dim) { + if (dim == 0) + return "x"; + if (dim == 1) + return "y"; + if (dim == 2) + return "z"; + + llvm_unreachable("dimension ID overflow"); +} + +/// Emits the (imperfect) loop nest performing the copy between "from" and "to" +/// values using the bounds derived from the "from" value. Emits at least +/// GPUDialect::getNumWorkgroupDimensions() loops, completing the nest with +/// single-iteration loops. Maps the innermost loops to thread dimensions, in +/// reverse order to enable access coalescing in the innermost loop. +static void insertCopyLoops(OpBuilder &builder, Location loc, + edsc::MemRefView &bounds, Value from, Value to) { + // Create EDSC handles for bounds. + unsigned rank = bounds.rank(); + SmallVector lbs, ubs, steps; + + // Make sure we have enough loops to use all thread dimensions, these trivial + // loops should be outermost and therefore inserted first. + if (rank < GPUDialect::getNumWorkgroupDimensions()) { + unsigned extraLoops = GPUDialect::getNumWorkgroupDimensions() - rank; + edsc::ValueHandle zero = edsc::intrinsics::constant_index(0); + edsc::ValueHandle one = edsc::intrinsics::constant_index(1); + lbs.resize(extraLoops, zero); + ubs.resize(extraLoops, one); + steps.resize(extraLoops, one); + } + + // Add existing bonuds. + lbs.append(bounds.getLbs().begin(), bounds.getLbs().end()); + ubs.append(bounds.getUbs().begin(), bounds.getUbs().end()); + + // Emit constant operations for steps. + steps.reserve(lbs.size()); + llvm::transform( + bounds.getSteps(), std::back_inserter(steps), + [](int64_t step) { return edsc::intrinsics::constant_index(step); }); + + // Obtain thread identifiers and block sizes, necessary to map to them. + auto indexType = builder.getIndexType(); + SmallVector threadIds, blockDims; + for (unsigned i = 0; i < 3; ++i) { + auto dimName = builder.getStringAttr(getDimName(i)); + threadIds.push_back( + builder.create(loc, indexType, dimName)); + blockDims.push_back( + builder.create(loc, indexType, dimName)); + } + + // Produce the loop nest with copies. + auto ivs = edsc::makeIndexHandles(lbs.size()); + auto ivPtrs = + edsc::makeHandlePointers(MutableArrayRef(ivs)); + edsc::LoopNestBuilder(ivPtrs, lbs, ubs, steps)([&]() { + auto activeIvs = llvm::makeArrayRef(ivs).take_back(rank); + edsc::StdIndexedValue fromHandle(from), toHandle(to); + toHandle(activeIvs) = fromHandle(activeIvs); + }); + + // Map the innermost loops to threads in reverse order. + for (auto en : + llvm::enumerate(llvm::reverse(llvm::makeArrayRef(ivs).take_back( + GPUDialect::getNumWorkgroupDimensions())))) { + auto loop = cast( + en.value().getValue().getParentRegion()->getParentOp()); + mapLoopToProcessorIds(loop, {threadIds[en.index()]}, + {blockDims[en.index()]}); + } +} + +/// Emits the loop nests performing the copy to the designated location in the +/// beginning of the region, and from the designated location immediately before +/// the terminator of the first block of the region. The region is expected to +/// have one block. This boils down to the following structure +/// +/// ^bb(...): +/// +/// for %arg0 = ... to ... step ... { +/// ... +/// for %argN = to ... step { +/// %0 = load %from[%arg0, ..., %argN] +/// store %0, %to[%arg0, ..., %argN] +/// } +/// ... +/// } +/// gpu.barrier +/// <... original body ...> +/// gpu.barrier +/// for %arg0 = ... to ... step ... { +/// ... +/// for %argN = to ... step { +/// %1 = load %to[%arg0, ..., %argN] +/// store %1, %from[%arg0, ..., %argN] +/// } +/// ... +/// } +/// +/// Inserts the barriers unconditionally since different threads may be copying +/// values and reading them. An analysis would be required to eliminate barriers +/// in case where value is only used by the thread that copies it. Both copies +/// are inserted unconditionally, an analysis would be required to only copy +/// live-in and live-out values when necessary. This copies the entire memref +/// pointed to by "from". In case a smaller block would be sufficient, the +/// caller can create a subview of the memref and promote it instead. +static void insertCopies(Region ®ion, Location loc, Value from, Value to) { + auto fromType = from.getType().cast(); + auto toType = to.getType().cast(); + (void)fromType; + (void)toType; + assert(fromType.getShape() == toType.getShape()); + assert(fromType.getRank() != 0); + assert(has_single_element(region) && + "unstructured control flow not supported"); + + OpBuilder builder(region.getContext()); + builder.setInsertionPointToStart(®ion.front()); + + edsc::ScopedContext edscContext(builder, loc); + edsc::MemRefView fromView(from); + insertCopyLoops(builder, loc, fromView, from, to); + builder.create(loc); + + builder.setInsertionPoint(®ion.front().back()); + builder.create(loc); + insertCopyLoops(builder, loc, fromView, to, from); +} + +/// Promotes a function argument to workgroup memory in the given function. The +/// copies will be inserted in the beginning and in the end of the function. +void mlir::promoteToWorkgroupMemory(GPUFuncOp op, unsigned arg) { + Value value = op.getArgument(arg); + auto type = value.getType().dyn_cast(); + assert(type && type.hasStaticShape() && "can only promote memrefs"); + + Value attribution = + op.addWorkgroupAttribution(type.getShape(), type.getElementType()); + + // Replace the uses first since only the original uses are currently present. + // Then insert the copies. + value.replaceAllUsesWith(attribution); + insertCopies(op.getBody(), op.getLoc(), value, attribution); +} diff --git a/mlir/lib/IR/Block.cpp b/mlir/lib/IR/Block.cpp index b0ada9981a8a7..2757c505555a1 100644 --- a/mlir/lib/IR/Block.cpp +++ b/mlir/lib/IR/Block.cpp @@ -179,6 +179,20 @@ void Block::eraseArgument(unsigned index, bool updatePredTerms) { } } +/// Insert one value to the given position of the argument list. The existing +/// arguments are shifted. The block is expected not to have predecessors. +BlockArgument Block::insertArgument(args_iterator it, Type type) { + assert(llvm::empty(getPredecessors()) && + "cannot insert arguments to blocks with predecessors"); + + // Use the args_iterator (on the BlockArgListType) to compute the insertion + // iterator in the underlying argument storage. + size_t distance = std::distance(args_begin(), it); + auto arg = BlockArgument::create(type, this); + arguments.insert(std::next(arguments.begin(), distance), arg); + return arg; +} + //===----------------------------------------------------------------------===// // Terminator management //===----------------------------------------------------------------------===// diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir index 1725a0b7c75c3..e01a23343652f 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir @@ -423,3 +423,104 @@ func @vector_print_vector(%arg0: vector<2x2xf32>) { // CHECK: llvm.call @print_close() : () -> () // CHECK: llvm.call @print_close() : () -> () // CHECK: llvm.call @print_newline() : () -> () + + +func @strided_slice(%arg0: vector<4xf32>, %arg1: vector<4x8xf32>, %arg2: vector<4x8x16xf32>) { +// CHECK-LABEL: llvm.func @strided_slice( + %0 = vector.strided_slice %arg0 {offsets = [2], sizes = [2], strides = [1]} : vector<4xf32> to vector<2xf32> +// CHECK: llvm.mlir.constant(0.000000e+00 : f32) : !llvm.float +// CHECK: llvm.mlir.constant(dense<0.000000e+00> : vector<2xf32>) : !llvm<"<2 x float>"> +// CHECK: llvm.mlir.constant(2 : index) : !llvm.i64 +// CHECK: llvm.extractelement %{{.*}}[%{{.*}} : !llvm.i64] : !llvm<"<4 x float>"> +// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 +// CHECK: llvm.insertelement %{{.*}}, %{{.*}}[%{{.*}} : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK: llvm.mlir.constant(3 : index) : !llvm.i64 +// CHECK: llvm.extractelement %{{.*}}[%{{.*}} : !llvm.i64] : !llvm<"<4 x float>"> +// CHECK: llvm.mlir.constant(1 : index) : !llvm.i64 +// CHECK: llvm.insertelement %{{.*}}, %{{.*}}[%{{.*}} : !llvm.i64] : !llvm<"<2 x float>"> + + %1 = vector.strided_slice %arg1 {offsets = [2], sizes = [2], strides = [1]} : vector<4x8xf32> to vector<2x8xf32> +// CHECK: llvm.mlir.constant(0.000000e+00 : f32) : !llvm.float +// CHECK: llvm.mlir.constant(dense<0.000000e+00> : vector<2x8xf32>) : !llvm<"[2 x <8 x float>]"> +// CHECK: llvm.extractvalue %{{.*}}[2] : !llvm<"[4 x <8 x float>]"> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm<"[2 x <8 x float>]"> +// CHECK: llvm.extractvalue %{{.*}}[3] : !llvm<"[4 x <8 x float>]"> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm<"[2 x <8 x float>]"> + + %2 = vector.strided_slice %arg1 {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x8xf32> to vector<2x2xf32> +// CHECK: llvm.mlir.constant(0.000000e+00 : f32) : !llvm.float +// CHECK: llvm.mlir.constant(dense<0.000000e+00> : vector<2x2xf32>) : !llvm<"[2 x <2 x float>]"> +// +// Subvector vector<8xf32> @2 +// CHECK: llvm.extractvalue {{.*}}[2] : !llvm<"[4 x <8 x float>]"> +// CHECK: llvm.mlir.constant(0.000000e+00 : f32) : !llvm.float +// CHECK: llvm.mlir.constant(dense<0.000000e+00> : vector<2xf32>) : !llvm<"<2 x float>"> +// CHECK: llvm.mlir.constant(2 : index) : !llvm.i64 +// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<8 x float>"> +// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 +// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK: llvm.mlir.constant(3 : index) : !llvm.i64 +// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<8 x float>"> +// CHECK: llvm.mlir.constant(1 : index) : !llvm.i64 +// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[2 x <2 x float>]"> +// +// Subvector vector<8xf32> @3 +// CHECK: llvm.extractvalue {{.*}}[3] : !llvm<"[4 x <8 x float>]"> +// CHECK: llvm.mlir.constant(0.000000e+00 : f32) : !llvm.float +// CHECK: llvm.mlir.constant(dense<0.000000e+00> : vector<2xf32>) : !llvm<"<2 x float>"> +// CHECK: llvm.mlir.constant(2 : index) : !llvm.i64 +// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<8 x float>"> +// CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 +// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK: llvm.mlir.constant(3 : index) : !llvm.i64 +// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<8 x float>"> +// CHECK: llvm.mlir.constant(1 : index) : !llvm.i64 +// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[2 x <2 x float>]"> + + return +} + +func @insert_strided_slice(%a: vector<2x2xf32>, %b: vector<4x4xf32>, %c: vector<4x4x4xf32>) { +// CHECK-LABEL: @insert_strided_slice + + %0 = vector.insert_strided_slice %b, %c {offsets = [2, 0, 0], strides = [1, 1]} : vector<4x4xf32> into vector<4x4x4xf32> +// CHECK: llvm.extractvalue {{.*}}[2] : !llvm<"[4 x [4 x <4 x float>]]"> +// CHECK-NEXT: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[4 x [4 x <4 x float>]]"> + + %1 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32> +// +// Subvector vector<2xf32> @0 into vector<4xf32> @2 +// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[2 x <2 x float>]"> +// CHECK-NEXT: llvm.extractvalue {{.*}}[2] : !llvm<"[4 x <4 x float>]"> +// Element @0 -> element @2 +// CHECK-NEXT: llvm.mlir.constant(0 : index) : !llvm.i64 +// CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK-NEXT: llvm.mlir.constant(2 : index) : !llvm.i64 +// CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<4 x float>"> +// Element @1 -> element @3 +// CHECK-NEXT: llvm.mlir.constant(1 : index) : !llvm.i64 +// CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK-NEXT: llvm.mlir.constant(3 : index) : !llvm.i64 +// CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<4 x float>"> +// CHECK-NEXT: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[4 x <4 x float>]"> +// +// Subvector vector<2xf32> @1 into vector<4xf32> @3 +// CHECK: llvm.extractvalue {{.*}}[1] : !llvm<"[2 x <2 x float>]"> +// CHECK-NEXT: llvm.extractvalue {{.*}}[3] : !llvm<"[4 x <4 x float>]"> +// Element @0 -> element @2 +// CHECK-NEXT: llvm.mlir.constant(0 : index) : !llvm.i64 +// CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK-NEXT: llvm.mlir.constant(2 : index) : !llvm.i64 +// CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<4 x float>"> +// Element @1 -> element @3 +// CHECK-NEXT: llvm.mlir.constant(1 : index) : !llvm.i64 +// CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK-NEXT: llvm.mlir.constant(3 : index) : !llvm.i64 +// CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<4 x float>"> +// CHECK-NEXT: llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm<"[4 x <4 x float>]"> + + return +} + diff --git a/mlir/test/Dialect/GPU/promotion.mlir b/mlir/test/Dialect/GPU/promotion.mlir new file mode 100644 index 0000000000000..c06174e0fcded --- /dev/null +++ b/mlir/test/Dialect/GPU/promotion.mlir @@ -0,0 +1,119 @@ +// RUN: mlir-opt -test-gpu-memory-promotion -split-input-file %s | FileCheck %s + +module @foo attributes {gpu.kernel_module} { + // Verify that the attribution was indeed introduced + // CHECK-LABEL: @memref3d + // CHECK-SAME: (%[[arg:.*]]: memref<5x4xf32> + // CHECK-SAME: workgroup(%[[promoted:.*]] : memref<5x4xf32, 3>) + gpu.func @memref3d(%arg0: memref<5x4xf32> {gpu.test_promote_workgroup}) kernel { + // Verify that loop bounds are emitted, the order does not matter. + // CHECK-DAG: %[[c1:.*]] = constant 1 + // CHECK-DAG: %[[c4:.*]] = constant 4 + // CHECK-DAG: %[[c5:.*]] = constant 5 + // CHECK-DAG: %[[tx:.*]] = "gpu.thread_id"() {dimension = "x"} + // CHECK-DAG: %[[ty:.*]] = "gpu.thread_id"() {dimension = "y"} + // CHECK-DAG: %[[tz:.*]] = "gpu.thread_id"() {dimension = "z"} + // CHECK-DAG: %[[bdx:.*]] = "gpu.block_dim"() {dimension = "x"} + // CHECK-DAG: %[[bdy:.*]] = "gpu.block_dim"() {dimension = "y"} + // CHECK-DAG: %[[bdz:.*]] = "gpu.block_dim"() {dimension = "z"} + + // Verify that loops for the copy are emitted. We only check the number of + // loops here since their bounds are produced by mapLoopToProcessorIds, + // tested separately. + // CHECK: loop.for %[[i0:.*]] = + // CHECK: loop.for %[[i1:.*]] = + // CHECK: loop.for %[[i2:.*]] = + + // Verify that the copy is emitted and uses only the last two loops. + // CHECK: %[[v:.*]] = load %[[arg]][%[[i1]], %[[i2]]] + // CHECK: store %[[v]], %[[promoted]][%[[i1]], %[[i2]]] + + // Verify that the use has been rewritten. + // CHECK: "use"(%[[promoted]]) : (memref<5x4xf32, 3>) + "use"(%arg0) : (memref<5x4xf32>) -> () + + + // Verify that loops for the copy are emitted. We only check the number of + // loops here since their bounds are produced by mapLoopToProcessorIds, + // tested separately. + // CHECK: loop.for %[[i0:.*]] = + // CHECK: loop.for %[[i1:.*]] = + // CHECK: loop.for %[[i2:.*]] = + + // Verify that the copy is emitted and uses only the last two loops. + // CHECK: %[[v:.*]] = load %[[promoted]][%[[i1]], %[[i2]]] + // CHECK: store %[[v]], %[[arg]][%[[i1]], %[[i2]]] + gpu.return + } +} + +// ----- + +module @foo attributes {gpu.kernel_module} { + // Verify that the attribution was indeed introduced + // CHECK-LABEL: @memref5d + // CHECK-SAME: (%[[arg:.*]]: memref<8x7x6x5x4xf32> + // CHECK-SAME: workgroup(%[[promoted:.*]] : memref<8x7x6x5x4xf32, 3>) + gpu.func @memref5d(%arg0: memref<8x7x6x5x4xf32> {gpu.test_promote_workgroup}) kernel { + // Verify that loop bounds are emitted, the order does not matter. + // CHECK-DAG: %[[c0:.*]] = constant 0 + // CHECK-DAG: %[[c1:.*]] = constant 1 + // CHECK-DAG: %[[c4:.*]] = constant 4 + // CHECK-DAG: %[[c5:.*]] = constant 5 + // CHECK-DAG: %[[c6:.*]] = constant 6 + // CHECK-DAG: %[[c7:.*]] = constant 7 + // CHECK-DAG: %[[c8:.*]] = constant 8 + // CHECK-DAG: %[[tx:.*]] = "gpu.thread_id"() {dimension = "x"} + // CHECK-DAG: %[[ty:.*]] = "gpu.thread_id"() {dimension = "y"} + // CHECK-DAG: %[[tz:.*]] = "gpu.thread_id"() {dimension = "z"} + // CHECK-DAG: %[[bdx:.*]] = "gpu.block_dim"() {dimension = "x"} + // CHECK-DAG: %[[bdy:.*]] = "gpu.block_dim"() {dimension = "y"} + // CHECK-DAG: %[[bdz:.*]] = "gpu.block_dim"() {dimension = "z"} + + // Verify that loops for the copy are emitted. + // CHECK: loop.for %[[i0:.*]] = + // CHECK: loop.for %[[i1:.*]] = + // CHECK: loop.for %[[i2:.*]] = + // CHECK: loop.for %[[i3:.*]] = + // CHECK: loop.for %[[i4:.*]] = + + // Verify that the copy is emitted. + // CHECK: %[[v:.*]] = load %[[arg]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]] + // CHECK: store %[[v]], %[[promoted]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]] + + // Verify that the use has been rewritten. + // CHECK: "use"(%[[promoted]]) : (memref<8x7x6x5x4xf32, 3>) + "use"(%arg0) : (memref<8x7x6x5x4xf32>) -> () + + // Verify that loop loops for the copy are emitted. + // CHECK: loop.for %[[i0:.*]] = + // CHECK: loop.for %[[i1:.*]] = + // CHECK: loop.for %[[i2:.*]] = + // CHECK: loop.for %[[i3:.*]] = + // CHECK: loop.for %[[i4:.*]] = + + // Verify that the copy is emitted. + // CHECK: %[[v:.*]] = load %[[promoted]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]] + // CHECK: store %[[v]], %[[arg]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]] + gpu.return + } +} + +// ----- + +module @foo attributes {gpu.kernel_module} { + // Check that attribution insertion works fine. + // CHECK-LABEL: @insert + // CHECK-SAME: (%{{.*}}: memref<4xf32> + // CHECK-SAME: workgroup(%{{.*}}: memref<1x1xf64, 3> + // CHECK-SAME: %[[wg2:.*]] : memref<4xf32, 3>) + // CHECK-SAME: private(%{{.*}}: memref<1x1xi64, 5>) + gpu.func @insert(%arg0: memref<4xf32> {gpu.test_promote_workgroup}) + workgroup(%arg1: memref<1x1xf64, 3>) + private(%arg2: memref<1x1xi64, 5>) + kernel { + // CHECK: "use"(%[[wg2]]) + "use"(%arg0) : (memref<4xf32>) -> () + gpu.return + } +} diff --git a/mlir/test/EDSC/builder-api-test.cpp b/mlir/test/EDSC/builder-api-test.cpp index c776ffe12bdd7..64e1b7094df3c 100644 --- a/mlir/test/EDSC/builder-api-test.cpp +++ b/mlir/test/EDSC/builder-api-test.cpp @@ -724,9 +724,10 @@ TEST_FUNC(indirect_access) { // clang-format on // clang-format off - // CHECK-LABEL: func @indirect_access( - // CHECK: [[B:%.*]] = affine.load - // CHECK: [[D:%.*]] = affine.load + // CHECK-LABEL: func @indirect_access + // CHECK-SAME: (%[[ARG0:.*]]: memref, %[[ARG1:.*]]: memref, %[[ARG2:.*]]: memref, %[[ARG3:.*]]: memref) + // CHECK-DAG: [[B:%.*]] = affine.load %[[ARG1]] + // CHECK-DAG: [[D:%.*]] = affine.load %[[ARG3]] // CHECK: load %{{.*}}{{\[}}[[B]]{{\]}} // CHECK: store %{{.*}}, %{{.*}}{{\[}}[[D]]{{\]}} // clang-format on diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir index a6c2326290752..16ee00923d258 100644 --- a/mlir/test/Transforms/canonicalize.mlir +++ b/mlir/test/Transforms/canonicalize.mlir @@ -703,28 +703,28 @@ func @view(%arg0 : index) { %c15 = constant 15 : index // Test: fold constant sizes and offset, update map with static stride/offset. - // CHECK: std.view %0[][] : memref<2048xi8> to memref<7x11xf32, #[[VIEW_MAP0]]> + // CHECK: std.view %[[ALLOC_MEM]][][] : memref<2048xi8> to memref<7x11xf32, #[[VIEW_MAP0]]> %1 = view %0[%c15][%c7, %c11] : memref<2048xi8> to memref load %1[%c0, %c0] : memref // Test: fold constant sizes but not offset, update map with static stride. // Test that we do not a fold dynamic dim which is not produced by a constant. - // CHECK: std.view %0[%arg0][] : memref<2048xi8> to memref<7x11xf32, #[[VIEW_MAP1]]> + // CHECK: std.view %[[ALLOC_MEM]][%arg0][] : memref<2048xi8> to memref<7x11xf32, #[[VIEW_MAP1]]> %2 = view %0[%arg0][%c7, %c11] : memref<2048xi8> to memref load %2[%c0, %c0] : memref // Test: fold constant offset but not sizes, update map with constant offset. // Test that we fold constant offset but not dynamic dims. - // CHECK: std.view %0[][%arg0, %arg0] : memref<2048xi8> to memref + // CHECK: std.view %[[ALLOC_MEM]][][%arg0, %arg0] : memref<2048xi8> to memref %3 = view %0[%c15][%arg0, %arg0] : memref<2048xi8> to memref load %3[%c0, %c0] : memref // Test: fold one constant dim, no offset, should update with constant // stride on dim 1, but leave dynamic stride on dim 0. - // CHECK: std.view %0[][%arg0, %arg0] : memref<2048xi8> to memref + // CHECK: std.view %[[ALLOC_MEM]][][%arg0, %arg0] : memref<2048xi8> to memref %4 = view %0[][%arg0, %arg0, %c7] : memref<2048xi8> to memref load %4[%c0, %c0, %c0] : memref @@ -736,7 +736,7 @@ func @view(%arg0 : index) { load %5[%c0, %c0] : memref // Test: folding static alloc and memref_cast into a view. - // CHECK: std.view %0[][%c15, %c7] : memref<2048xi8> to memref + // CHECK: std.view %[[ALLOC_MEM]][][%c15, %c7] : memref<2048xi8> to memref %6 = memref_cast %0 : memref<2048xi8> to memref %7 = view %6[%c15][%c7] : memref to memref load %7[%c0, %c0] : memref diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt index b6338e1d167cd..ac4a4930e5a51 100644 --- a/mlir/test/lib/Transforms/CMakeLists.txt +++ b/mlir/test/lib/Transforms/CMakeLists.txt @@ -2,6 +2,7 @@ add_llvm_library(MLIRTestTransforms TestCallGraph.cpp TestConstantFold.cpp TestLoopFusion.cpp + TestGpuMemoryPromotion.cpp TestInlining.cpp TestLinalgTransforms.cpp TestLiveness.cpp @@ -26,6 +27,8 @@ add_dependencies(MLIRTestTransforms MLIRTestVectorTransformPatternsIncGen) target_link_libraries(MLIRTestTransforms MLIRAffineOps MLIRAnalysis + MLIREDSC + MLIRGPU MLIRLoopOps MLIRPass MLIRTestDialect diff --git a/mlir/test/lib/Transforms/TestGpuMemoryPromotion.cpp b/mlir/test/lib/Transforms/TestGpuMemoryPromotion.cpp new file mode 100644 index 0000000000000..ee0291827fa45 --- /dev/null +++ b/mlir/test/lib/Transforms/TestGpuMemoryPromotion.cpp @@ -0,0 +1,40 @@ +//===- TestGPUMemoryPromotionPass.cpp - Test pass for GPU promotion -------===// +// +// Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the pass testing the utilities for moving data across +// different levels of the GPU memory hierarchy. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/GPU/GPUDialect.h" +#include "mlir/Dialect/GPU/MemoryPromotion.h" +#include "mlir/IR/Attributes.h" +#include "mlir/Pass/Pass.h" + +using namespace mlir; + +namespace { +/// Simple pass for testing the promotion to workgroup memory in GPU functions. +/// Promotes all arguments with "gpu.test_promote_workgroup" attribute. This +/// does not check whether the promotion is legal (e.g., amount of memory used) +/// or beneficial (e.g., makes previously uncoalesced loads coalesced). +class TestGpuMemoryPromotionPass + : public OperationPass { + void runOnOperation() override { + gpu::GPUFuncOp op = getOperation(); + for (unsigned i = 0, e = op.getNumArguments(); i < e; ++i) { + if (op.getArgAttrOfType(i, "gpu.test_promote_workgroup")) + promoteToWorkgroupMemory(op, i); + } + } +}; +} // end namespace + +static PassRegistration registration( + "test-gpu-memory-promotion", + "Promotes the annotated arguments of gpu.func to workgroup memory.");