diff --git a/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp index 5524c4b484be1..67d0931003c54 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/UnsafeFunctionsCheck.cpp @@ -301,14 +301,20 @@ void UnsafeFunctionsCheck::check(const MatchFinder::MatchResult &Result) { if (Custom) { for (const auto &Entry : CustomFunctions) { if (Entry.Pattern.match(*FuncDecl)) { - const StringRef Reason = + StringRef Reason = Entry.Reason.empty() ? "is marked as unsafe" : Entry.Reason.c_str(); - if (Entry.Replacement.empty()) { + // Omit the replacement, when a fully-custom reason is given. + if (Reason.consume_front(">")) { + diag(SourceExpr->getExprLoc(), "function %0 %1") + << FuncDecl << Reason.trim() << SourceExpr->getSourceRange(); + // Do not recommend a replacement when it is not present. + } else if (Entry.Replacement.empty()) { diag(SourceExpr->getExprLoc(), "function %0 %1; it should not be used") << FuncDecl << Reason << Entry.Replacement << SourceExpr->getSourceRange(); + // Otherwise, emit the replacement. } else { diag(SourceExpr->getExprLoc(), "function %0 %1; '%2' should be used instead") diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index b982216297919..743397e3ec6ce 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -69,6 +69,13 @@ Potentially Breaking Changes - `CharTypdefsToIgnore` to `CharTypedefsToIgnore` in :doc:`bugprone-signed-char-misuse ` + +- Modified the custom message format of :doc:`bugprone-unsafe-functions + ` by assigning a special meaning + to the character ``>`` at the start of the value of the option + ``CustomFunctions``. If the option value starts with ``>``, then the + replacement suggestion part of the message (which would be included by + default) is omitted. (This does not change the warning locations.) - :program:`clang-tidy` now displays warnings from all non-system headers by default. Previously, users had to explicitly opt-in to header warnings using @@ -387,6 +394,11 @@ Changes in existing checks ` check by adding an additional matcher that generalizes the copy-and-swap idiom pattern detection. + +- Improved :doc:`bugprone-unsafe-functions + ` check by hiding the default + suffix when the reason starts with the character `>` in the `CustomFunctions` + option. - Improved :doc:`cppcoreguidelines-avoid-non-const-global-variables ` check diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-functions.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-functions.rst index f1fec13739271..cb7ea415c54b2 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-functions.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-functions.rst @@ -96,37 +96,62 @@ to be checked. The format is the following, without newlines: The functions are matched using POSIX extended regular expressions. *(Note: The regular expressions do not support negative* ``(?!)`` *matches.)* -The `reason` is optional and is used to provide additional information -about the reasoning behind the replacement. The default reason is -`is marked as unsafe`. +The ``reason`` is optional and is used to provide additional information about the +reasoning behind the replacement. The default reason is ``is marked as unsafe``. -If `replacement` is empty, the text `it should not be used` will be shown -instead of the suggestion for a replacement. +If ``replacement`` is empty, the default text ``it should not be used`` will be +shown instead of the suggestion for a replacement. -As an example, the configuration `^original$, replacement, is deprecated;` -will produce the following diagnostic message. +If the ``reason`` starts with the character ``>``, the reason becomes fully custom. +The default suffix is disabled even if a ``replacement`` is present, and only the +reason message is shown after the matched function, to allow better control over +the suggestions. (The starting ``>`` and whitespace directly after it are +trimmed from the message.) + +As an example, the following configuration matches only the function ``original`` +in the default namespace. A similar diagnostic can also be printed using a fully +custom reason. .. code:: c + // bugprone-unsafe-functions.CustomFunctions: + // ^original$, replacement, is deprecated; + // Using the fully custom message syntax: + // ^suspicious$,,> should be avoided if possible. original(); // warning: function 'original' is deprecated; 'replacement' should be used instead. + suspicious(); // warning: function 'suspicious' should be avoided if possible. ::std::original(); // no-warning original_function(); // no-warning -If the regular expression contains the character `:`, it is matched against the -qualified name (i.e. ``std::original``), otherwise the regex is matched against the unqualified name (``original``). -If the regular expression starts with `::` (or `^::`), it is matched against the -fully qualified name (``::std::original``). +If the regular expression contains the character ``:``, it is matched against the +qualified name (i.e. ``std::original``), otherwise the regex is matched against +the unqualified name (``original``). If the regular expression starts with ``::`` +(or ``^::``), it is matched against the fully qualified name +(``::std::original``). + +One of the use cases for fully custom messages is suggesting compiler options +and warning flags: + +.. code:: c + + // bugprone-unsafe-functions.CustomFunctions: + // ^memcpy$,,>is recommended to have compiler hardening using '_FORTIFY_SOURCE'; + // ^printf$,,>is recommended to have the '-Werror=format-security' compiler warning flag; + + memcpy(dest, src, 999'999); // warning: function 'memcpy' is recommended to have compiler hardening using '_FORTIFY_SOURCE' + printf(raw_str); // warning: function 'printf' is recommended to have the '-Werror=format-security' compiler warning flag .. note:: - Fully qualified names can contain template parameters on certain C++ classes, but not on C++ functions. - Type aliases are resolved before matching. + Fully qualified names can contain template parameters on certain C++ classes, + but not on C++ functions. Type aliases are resolved before matching. As an example, the member function ``open`` in the class ``std::ifstream`` has a fully qualified name of ``::std::basic_ifstream::open``. - The example could also be matched with the regex ``::std::basic_ifstream<[^>]*>::open``, which matches all potential - template parameters, but does not match nested template classes. + The example could also be matched with the regex + ``::std::basic_ifstream<[^>]*>::open``, which matches all potential template + parameters, but does not match nested template classes. Options ------- diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions-custom.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions-custom.c index 7fd71ec2f2e7b..7eaf015f06aa2 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions-custom.c +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/unsafe-functions-custom.c @@ -1,5 +1,5 @@ // RUN: %check_clang_tidy -check-suffix=NON-STRICT-REGEX %s bugprone-unsafe-functions %t --\ -// RUN: -config="{CheckOptions: {bugprone-unsafe-functions.CustomFunctions: '::name_match,replacement,is a qualname match;^::prefix_match,,is matched on qualname prefix'}}" +// RUN: -config="{CheckOptions: {bugprone-unsafe-functions.CustomFunctions: \"::name_match,,>is a qualname match, but with a fully 'custom' message;^::prefix_match,,is matched on qualname prefix\"}}" // RUN: %check_clang_tidy -check-suffix=STRICT-REGEX %s bugprone-unsafe-functions %t --\ // RUN: -config="{CheckOptions: {bugprone-unsafe-functions.CustomFunctions: '^name_match$,replacement,is matched on function name only;^::prefix_match$,,is a full qualname match'}}" @@ -11,14 +11,14 @@ void prefix_match_regex(); void f1() { name_match(); - // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'name_match' is a qualname match; 'replacement' should be used instead + // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'name_match' is a qualname match, but with a fully 'custom' message // CHECK-MESSAGES-STRICT-REGEX: :[[@LINE-2]]:3: warning: function 'name_match' is matched on function name only; 'replacement' should be used instead prefix_match(); // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'prefix_match' is matched on qualname prefix; it should not be used // CHECK-MESSAGES-STRICT-REGEX: :[[@LINE-2]]:3: warning: function 'prefix_match' is a full qualname match; it should not be used name_match_regex(); - // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'name_match_regex' is a qualname match; 'replacement' should be used instead + // CHECK-MESSAGES-NON-STRICT-REGEX: :[[@LINE-1]]:3: warning: function 'name_match_regex' is a qualname match, but with a fully 'custom' message // no-warning STRICT-REGEX prefix_match_regex(); diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 0a79b5ca73472..f2d8eb43b15a7 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -743,6 +743,7 @@ Bug Fixes to Attribute Support - Fixes crashes or missing diagnostics with the `device_kernel` attribute. (#GH161905) - Fix handling of parameter indexes when an attribute is applied to a C++23 explicit object member function. - Fixed several false positives and false negatives in function effect (`nonblocking`) analysis. (#GH166078) (#GH166101) (#GH166110) +- Fix ``cleanup`` attribute by delaying type checks until after the type is deduced. (#GH129631) Bug Fixes to C++ Support ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 8dfe4bc08c48e..0097476bc0d8d 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -741,6 +741,17 @@ class Attr { // our existing general parsing we need to have a separate flag that // opts an attribute into strict parsing of attribute parameters bit StrictEnumParameters = 0; + // Set to true for attributes which have Sema checks which requires the type + // to be deduced. + // When `IsTypeDependent` is set to true, you should add an `ActOn*Attr` + // function to `Sema.h`. The signature of the function must be: + // `void ActOn*Attr(Decl *, const Attr *);` where the `Decl *` is the + // declaration the attribute will be attached to; its type will have already + // been deduced, and the `Attr *` is the attribute being applied to that + // declaration. This function should handle all type-sensitive semantics for + // the attribute. This function will be automatically called by + // `Sema::CheckAttributesOnDeducedType()`. + bit IsTypeDependent = 0; // Lists language options, one of which is required to be true for the // attribute to be applicable. If empty, no language options are required. list LangOpts = []; @@ -1400,6 +1411,7 @@ def Cleanup : InheritableAttr { let Args = [DeclArgument]; let Subjects = SubjectList<[LocalVar]>; let Documentation = [CleanupDocs]; + let IsTypeDependent = 1; // FIXME: DeclArgument should be reworked to also store the // Expr instead of adding attr specific hacks like the following. // See the discussion in https://github.com/llvm/llvm-project/pull/14023. diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index dbf857afa08c8..47da17e5cfe83 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -5253,6 +5253,18 @@ def HLSLF16ToF32 : LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } +def HLSLDdxCoarse : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_elementwise_ddx_coarse"]; + let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Prototype = "void(...)"; +} + +def HLSLDdyCoarse : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_elementwise_ddy_coarse"]; + let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Prototype = "void(...)"; +} + // Builtins for XRay. def XRayCustomEvent : Builtin { let Spellings = ["__xray_customevent"]; diff --git a/clang/include/clang/Sema/CMakeLists.txt b/clang/include/clang/Sema/CMakeLists.txt index 9077e22c2307c..3f540ea596871 100644 --- a/clang/include/clang/Sema/CMakeLists.txt +++ b/clang/include/clang/Sema/CMakeLists.txt @@ -8,6 +8,11 @@ clang_tablegen(AttrParsedAttrKinds.inc -gen-clang-attr-parsed-attr-kinds SOURCE ../Basic/Attr.td TARGET ClangAttrParsedAttrKinds) +clang_tablegen(AttrIsTypeDependent.inc -gen-clang-attr-is-type-dependent + -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ + SOURCE ../Basic/Attr.td + TARGET ClangAttrIsTypeDependent) + clang_tablegen(AttrSpellingListIndex.inc -gen-clang-attr-spelling-index -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ SOURCE ../Basic/Attr.td diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 6ca182338d6af..fd2a2469142e4 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -4456,6 +4456,10 @@ class Sema final : public SemaBase { NamedDecl *New, Decl *Old, AvailabilityMergeKind AMK = AvailabilityMergeKind::Redeclaration); + /// CheckAttributesOnDeducedType - Calls Sema functions for attributes that + /// requires the type to be deduced. + void CheckAttributesOnDeducedType(Decl *D); + /// MergeTypedefNameDecl - We just parsed a typedef 'New' which has the /// same name and scope as a previous declaration 'Old'. Figure out /// how to resolve this situation, merging decls or emitting @@ -4760,6 +4764,8 @@ class Sema final : public SemaBase { // linkage or not. static bool mightHaveNonExternalLinkage(const DeclaratorDecl *FD); +#include "clang/Sema/AttrIsTypeDependent.inc" + ///@} // @@ -15469,6 +15475,8 @@ class Sema final : public SemaBase { std::optional ActOnEffectExpression(Expr *CondExpr, StringRef AttributeName); + void ActOnCleanupAttr(Decl *D, const Attr *A); + private: /// The implementation of RequireCompleteType bool RequireCompleteTypeImpl(SourceLocation Loc, QualType T, diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index b6928ce7d9c44..12d9a98915ce3 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -924,6 +924,24 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, return EmitRuntimeCall( Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); } + case Builtin::BI__builtin_hlsl_elementwise_ddx_coarse: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + if (!E->getArg(0)->getType()->hasFloatingRepresentation()) + llvm_unreachable("ddx_coarse operand must have a float representation"); + Intrinsic::ID ID = CGM.getHLSLRuntime().getDdxCoarseIntrinsic(); + return Builder.CreateIntrinsic(/*ReturnType=*/Op0->getType(), ID, + ArrayRef{Op0}, nullptr, + "hlsl.ddx.coarse"); + } + case Builtin::BI__builtin_hlsl_elementwise_ddy_coarse: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + if (!E->getArg(0)->getType()->hasFloatingRepresentation()) + llvm_unreachable("ddy_coarse operand must have a float representation"); + Intrinsic::ID ID = CGM.getHLSLRuntime().getDdyCoarseIntrinsic(); + return Builder.CreateIntrinsic(/*ReturnType=*/Op0->getType(), ID, + ArrayRef{Op0}, nullptr, + "hlsl.ddy.coarse"); + } case Builtin::BI__builtin_get_spirv_spec_constant_bool: case Builtin::BI__builtin_get_spirv_spec_constant_short: case Builtin::BI__builtin_get_spirv_spec_constant_ushort: diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 48935584f28a2..e1200c62eccf1 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -163,6 +163,8 @@ class CGHLSLRuntime { GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrierWithGroupSync, group_memory_barrier_with_group_sync) GENERATE_HLSL_INTRINSIC_FUNCTION(GetDimensionsX, resource_getdimensions_x) + GENERATE_HLSL_INTRINSIC_FUNCTION(DdxCoarse, ddx_coarse) + GENERATE_HLSL_INTRINSIC_FUNCTION(DdyCoarse, ddy_coarse) //===----------------------------------------------------------------------===// // End of reserved area for HLSL intrinsic getters. diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index e30e56d66c58b..29c9e2125b653 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -72,6 +72,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" @@ -105,6 +106,7 @@ #include #include #include +#include #include #if LLVM_ON_UNIX #include // getpid @@ -2060,12 +2062,17 @@ void Driver::generateCompilationDiagnostics( InputList Inputs; BuildInputs(C.getDefaultToolChain(), C.getArgs(), Inputs); + ArgStringList IRInputs; for (InputList::iterator it = Inputs.begin(), ie = Inputs.end(); it != ie;) { bool IgnoreInput = false; - // Ignore input from stdin or any inputs that cannot be preprocessed. - // Check type first as not all linker inputs have a value. - if (types::getPreprocessedType(it->first) == types::TY_INVALID) { + // Save IR inputs separately, ignore input from stdin or any other inputs + // that cannot be preprocessed. Check type first as not all linker inputs + // have a value. + if (types::isLLVMIR(it->first)) { + IRInputs.push_back(it->second->getValue()); + IgnoreInput = true; + } else if (types::getPreprocessedType(it->first) == types::TY_INVALID) { IgnoreInput = true; } else if (!strcmp(it->second->getValue(), "-")) { Diag(clang::diag::note_drv_command_failed_diag_msg) @@ -2082,7 +2089,7 @@ void Driver::generateCompilationDiagnostics( } } - if (Inputs.empty()) { + if (Inputs.empty() && IRInputs.empty()) { Diag(clang::diag::note_drv_command_failed_diag_msg) << "Error generating preprocessed source(s) - " "no preprocessable inputs."; @@ -2105,46 +2112,82 @@ void Driver::generateCompilationDiagnostics( return; } - // Construct the list of abstract actions to perform for this compilation. On - // Darwin OSes this uses the driver-driver and builds universal actions. - const ToolChain &TC = C.getDefaultToolChain(); - if (TC.getTriple().isOSBinFormatMachO()) - BuildUniversalActions(C, TC, Inputs); - else - BuildActions(C, C.getArgs(), Inputs, C.getActions()); + // If we only have IR inputs there's no need for preprocessing. + if (!Inputs.empty()) { + // Construct the list of abstract actions to perform for this compilation. + // On Darwin OSes this uses the driver-driver and builds universal actions. + const ToolChain &TC = C.getDefaultToolChain(); + if (TC.getTriple().isOSBinFormatMachO()) + BuildUniversalActions(C, TC, Inputs); + else + BuildActions(C, C.getArgs(), Inputs, C.getActions()); - BuildJobs(C); + BuildJobs(C); - // If there were errors building the compilation, quit now. - if (Trap.hasErrorOccurred()) { - Diag(clang::diag::note_drv_command_failed_diag_msg) - << "Error generating preprocessed source(s)."; - return; - } + // If there were errors building the compilation, quit now. + if (Trap.hasErrorOccurred()) { + Diag(clang::diag::note_drv_command_failed_diag_msg) + << "Error generating preprocessed source(s)."; + return; + } + // Generate preprocessed output. + SmallVector, 4> FailingCommands; + C.ExecuteJobs(C.getJobs(), FailingCommands); - // Generate preprocessed output. - SmallVector, 4> FailingCommands; - C.ExecuteJobs(C.getJobs(), FailingCommands); + // If any of the preprocessing commands failed, clean up and exit. + if (!FailingCommands.empty()) { + Diag(clang::diag::note_drv_command_failed_diag_msg) + << "Error generating preprocessed source(s)."; + return; + } - // If any of the preprocessing commands failed, clean up and exit. - if (!FailingCommands.empty()) { - Diag(clang::diag::note_drv_command_failed_diag_msg) - << "Error generating preprocessed source(s)."; - return; + const ArgStringList &TempFiles = C.getTempFiles(); + if (TempFiles.empty()) { + Diag(clang::diag::note_drv_command_failed_diag_msg) + << "Error generating preprocessed source(s)."; + return; + } } - const ArgStringList &TempFiles = C.getTempFiles(); - if (TempFiles.empty()) { - Diag(clang::diag::note_drv_command_failed_diag_msg) - << "Error generating preprocessed source(s)."; - return; + // Copying filenames due to ownership. + const ArgStringList &Files = C.getTempFiles(); + SmallVector TempFiles(Files.begin(), Files.end()); + + // We'd like to copy the IR input file into our own temp file + // because the build system might try to clean-up after itself. + for (auto const *Input : IRInputs) { + int FD; + llvm::SmallVector Path; + + StringRef extension = llvm::sys::path::extension(Input); + if (!extension.empty()) + extension = extension.drop_front(); + + std::error_code EC = llvm::sys::fs::createTemporaryFile( + llvm::sys::path::stem(Input), extension, FD, Path); + if (EC) { + Diag(clang::diag::note_drv_command_failed_diag_msg) + << "Error generating run script: " << "Failed copying IR input files" + << " " << EC.message(); + return; + } + + EC = llvm::sys::fs::copy_file(Input, FD); + if (EC) { + Diag(clang::diag::note_drv_command_failed_diag_msg) + << "Error generating run script: " << "Failed copying IR input files" + << " " << EC.message(); + return; + } + + TempFiles.push_back(std::string(Path.begin(), Path.end())); } Diag(clang::diag::note_drv_command_failed_diag_msg) << BugReporMsg; SmallString<128> VFS; SmallString<128> ReproCrashFilename; - for (const char *TempFile : TempFiles) { + for (std::string &TempFile : TempFiles) { Diag(clang::diag::note_drv_command_failed_diag_msg) << TempFile; if (Report) Report->TemporaryFiles.push_back(TempFile); @@ -2161,7 +2204,7 @@ void Driver::generateCompilationDiagnostics( } for (const char *TempFile : SavedTemps) - C.addTempFile(TempFile); + TempFiles.push_back(TempFile); // Assume associated files are based off of the first temporary file. CrashReportInfo CrashInfo(TempFiles[0], VFS); diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 5ee53c01d6e30..9871cf4449ec6 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -1646,6 +1646,8 @@ SanitizerMask ToolChain::getSupportedSanitizers() const { Res |= SanitizerKind::ShadowCallStack; if (getTriple().isAArch64(64)) Res |= SanitizerKind::MemTag; + if (getTriple().isBPF()) + Res |= SanitizerKind::KernelAddress; return Res; } diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h index 2e2703de18cb1..38b95ee90736a 100644 --- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h @@ -2946,5 +2946,73 @@ float4 radians(float4); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_group_memory_barrier_with_group_sync) __attribute__((convergent)) void GroupMemoryBarrierWithGroupSync(void); +//===----------------------------------------------------------------------===// +// ddx_coarse builtin +//===----------------------------------------------------------------------===// + +/// \fn T ddx_coarse(T value) +/// \brief Computes a low precision partial derivative with respect to the +/// screen-space x-coordinate. +/// \param value The input value. +/// +/// The return value is a floating point scalar or vector containing the low +/// prevision partial derivative of the input value. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse) +half ddx_coarse(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse) +half2 ddx_coarse(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse) +half3 ddx_coarse(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse) +half4 ddx_coarse(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse) +float ddx_coarse(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse) +float2 ddx_coarse(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse) +float3 ddx_coarse(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse) +float4 ddx_coarse(float4); + +//===----------------------------------------------------------------------===// +// ddy_coarse builtin +//===----------------------------------------------------------------------===// + +/// \fn T ddy_coarse(T value) +/// \brief Computes a low precision partial derivative with respect to the +/// screen-space y-coordinate. +/// \param value The input value. +/// +/// The return value is a floating point scalar or vector containing the low +/// prevision partial derivative of the input value. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse) +half ddy_coarse(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse) +half2 ddy_coarse(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse) +half3 ddy_coarse(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse) +half4 ddy_coarse(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse) +float ddy_coarse(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse) +float2 ddy_coarse(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse) +float3 ddy_coarse(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse) +float4 ddy_coarse(float4); + } // namespace hlsl #endif //_HLSL_HLSL_ALIAS_INTRINSICS_H_ diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 0ab408cd060c8..4b4ce13806873 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -3355,6 +3355,11 @@ void Sema::mergeDeclAttributes(NamedDecl *New, Decl *Old, if (!foundAny) New->dropAttrs(); } +void Sema::CheckAttributesOnDeducedType(Decl *D) { + for (const Attr *A : D->attrs()) + checkAttrIsTypeDependent(D, A); +} + // Returns the number of added attributes. template static unsigned propagateAttribute(ParmVarDecl *To, const ParmVarDecl *From, @@ -13829,6 +13834,8 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) { return; } + this->CheckAttributesOnDeducedType(RealDecl); + // dllimport cannot be used on variable definitions. if (VDecl->hasAttr() && !VDecl->isStaticDataMember()) { Diag(VDecl->getLocation(), diag::err_attribute_dllimport_data_definition); @@ -14327,6 +14334,8 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl) { DeduceVariableDeclarationType(Var, false, nullptr)) return; + this->CheckAttributesOnDeducedType(RealDecl); + // C++11 [class.static.data]p3: A static data member can be declared with // the constexpr specifier; if so, its declaration shall specify // a brace-or-equal-initializer. diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index a9e7b44ac9d73..bda7aa32a9348 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -3511,16 +3511,6 @@ static void handleCleanupAttr(Sema &S, Decl *D, const ParsedAttr &AL) { return; } - // We're currently more strict than GCC about what function types we accept. - // If this ever proves to be a problem it should be easy to fix. - QualType Ty = S.Context.getPointerType(cast(D)->getType()); - QualType ParamTy = FD->getParamDecl(0)->getType(); - if (!S.IsAssignConvertCompatible(S.CheckAssignmentConstraints( - FD->getParamDecl(0)->getLocation(), ParamTy, Ty))) { - S.Diag(Loc, diag::err_attribute_cleanup_func_arg_incompatible_type) - << NI.getName() << ParamTy << Ty; - return; - } VarDecl *VD = cast(D); // Create a reference to the variable declaration. This is a fake/dummy // reference. @@ -8311,3 +8301,28 @@ void Sema::redelayDiagnostics(DelayedDiagnosticPool &pool) { assert(curPool && "re-emitting in undelayed context not supported"); curPool->steal(pool); } + +void Sema::ActOnCleanupAttr(Decl *D, const Attr *A) { + VarDecl *VD = cast(D); + if (VD->getType()->isDependentType()) + return; + + // Obtains the FunctionDecl that was found when handling the attribute + // earlier. + CleanupAttr *Attr = D->getAttr(); + FunctionDecl *FD = Attr->getFunctionDecl(); + DeclarationNameInfo NI = FD->getNameInfo(); + + // We're currently more strict than GCC about what function types we accept. + // If this ever proves to be a problem it should be easy to fix. + QualType Ty = this->Context.getPointerType(VD->getType()); + QualType ParamTy = FD->getParamDecl(0)->getType(); + if (!this->IsAssignConvertCompatible(this->CheckAssignmentConstraints( + FD->getParamDecl(0)->getLocation(), ParamTy, Ty))) { + this->Diag(Attr->getArgLoc(), + diag::err_attribute_cleanup_func_arg_incompatible_type) + << NI.getName() << ParamTy << Ty; + D->dropAttr(); + return; + } +} diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 2b9b3abbd5360..5555916c2536f 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -3239,7 +3239,9 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case Builtin::BI__builtin_hlsl_elementwise_degrees: case Builtin::BI__builtin_hlsl_elementwise_radians: case Builtin::BI__builtin_hlsl_elementwise_rsqrt: - case Builtin::BI__builtin_hlsl_elementwise_frac: { + case Builtin::BI__builtin_hlsl_elementwise_frac: + case Builtin::BI__builtin_hlsl_elementwise_ddx_coarse: + case Builtin::BI__builtin_hlsl_elementwise_ddy_coarse: { if (SemaRef.checkArgCount(TheCall, 1)) return true; if (CheckAllArgTypesAreCorrect(&SemaRef, TheCall, diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 1b6b559c1227b..3a4b2ccc74350 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -1007,6 +1007,15 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs, continue; } + if (auto *A = dyn_cast(TmplAttr)) { + if (!New->hasAttr()) { + auto *NewAttr = A->clone(Context); + NewAttr->setArgLoc(A->getArgLoc()); + New->addAttr(NewAttr); + } + continue; + } + assert(!TmplAttr->isPackExpansion()); if (TmplAttr->isLateParsed() && LateAttrs) { // Late parsed attributes must be instantiated and attached after the diff --git a/clang/test/CIR/CodeGen/call.c b/clang/test/CIR/CodeGen/call.c index d780e37f3d153..99ae4506b1f16 100644 --- a/clang/test/CIR/CodeGen/call.c +++ b/clang/test/CIR/CodeGen/call.c @@ -130,7 +130,7 @@ int f12(void) { // OGCG: %{{.+}} = call i32 @f10(i32 noundef 1) #[[ATTR0:.+]] // OGCG-NEXT: %{{.+}} = call i32 @f11(i32 noundef 2) #[[ATTR1:.+]] -// LLVM: attributes #[[ATTR0]] = { nounwind willreturn memory(read, errnomem: none) } +// LLVM: attributes #[[ATTR0]] = { nounwind willreturn memory(read, errnomem: none, target_mem0: none, target_mem1: none) } // LLVM: attributes #[[ATTR1]] = { nounwind willreturn memory(none) } // OGCG: attributes #[[ATTR0]] = { nounwind willreturn memory(read) } diff --git a/clang/test/CodeGenHLSL/builtins/ddx-coarse-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/ddx-coarse-builtin.hlsl new file mode 100644 index 0000000000000..01216eefadba2 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/ddx-coarse-builtin.hlsl @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple spirv-pc-vulkan-compute %s \ +// RUN: -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK-SPIRV + +// CHECK-LABEL: half @_Z19test_f16_ddx_coarseDh +// CHECK: %hlsl.ddx.coarse = call {{.*}} half @llvm.dx.ddx.coarse.f16(half %{{.*}}) +// CHECK: ret half %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: half @_Z19test_f16_ddx_coarseDh +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} half @llvm.spv.ddx.coarse.f16(half %{{.*}}) +// CHECK-SPIRV: ret half %hlsl.ddx.coarse +half test_f16_ddx_coarse(half val) { + return __builtin_hlsl_elementwise_ddx_coarse(val); +} + +// CHECK-LABEL: float @_Z19test_f32_ddx_coarsef +// CHECK: %hlsl.ddx.coarse = call {{.*}} float @llvm.dx.ddx.coarse.f32(float %{{.*}}) +// CHECK: ret float %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: float @_Z19test_f32_ddx_coarsef +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} float @llvm.spv.ddx.coarse.f32(float %{{.*}}) +// CHECK-SPIRV: ret float %hlsl.ddx.coarse +float test_f32_ddx_coarse(float val) { + return __builtin_hlsl_elementwise_ddx_coarse(val); +} diff --git a/clang/test/CodeGenHLSL/builtins/ddx-coarse.hlsl b/clang/test/CodeGenHLSL/builtins/ddx-coarse.hlsl new file mode 100644 index 0000000000000..c200d4715629e --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/ddx-coarse.hlsl @@ -0,0 +1,86 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple spirv-pc-vulkan-compute %s \ +// RUN: -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK-SPIRV + +// CHECK-LABEL: half @_Z19test_f16_ddx_coarseDh +// CHECK: %hlsl.ddx.coarse = call {{.*}} half @llvm.dx.ddx.coarse.f16(half %{{.*}}) +// CHECK: ret half %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: half @_Z19test_f16_ddx_coarseDh +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} half @llvm.spv.ddx.coarse.f16(half %{{.*}}) +// CHECK-SPIRV: ret half %hlsl.ddx.coarse +half test_f16_ddx_coarse(half val) { + return ddx_coarse(val); +} + +// CHECK-LABEL: <2 x half> @_Z20test_f16_ddx_coarse2Dv2_Dh +// CHECK: %hlsl.ddx.coarse = call {{.*}} <2 x half> @llvm.dx.ddx.coarse.v2f16(<2 x half> %{{.*}}) +// CHECK: ret <2 x half> %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: <2 x half> @_Z20test_f16_ddx_coarse2Dv2_Dh +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <2 x half> @llvm.spv.ddx.coarse.v2f16(<2 x half> %{{.*}}) +// CHECK-SPIRV: ret <2 x half> %hlsl.ddx.coarse +half2 test_f16_ddx_coarse2(half2 val) { + return ddx_coarse(val); +} + +// CHECK-LABEL: <3 x half> @_Z20test_f16_ddx_coarse3Dv3_Dh +// CHECK: %hlsl.ddx.coarse = call {{.*}} <3 x half> @llvm.dx.ddx.coarse.v3f16(<3 x half> %{{.*}}) +// CHECK: ret <3 x half> %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: <3 x half> @_Z20test_f16_ddx_coarse3Dv3_Dh +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <3 x half> @llvm.spv.ddx.coarse.v3f16(<3 x half> %{{.*}}) +// CHECK-SPIRV: ret <3 x half> %hlsl.ddx.coarse +half3 test_f16_ddx_coarse3(half3 val) { + return ddx_coarse(val); +} + +// CHECK-LABEL: <4 x half> @_Z20test_f16_ddx_coarse4Dv4_Dh +// CHECK: %hlsl.ddx.coarse = call {{.*}} <4 x half> @llvm.dx.ddx.coarse.v4f16(<4 x half> %{{.*}}) +// CHECK: ret <4 x half> %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: <4 x half> @_Z20test_f16_ddx_coarse4Dv4_Dh +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <4 x half> @llvm.spv.ddx.coarse.v4f16(<4 x half> %{{.*}}) +// CHECK-SPIRV: ret <4 x half> %hlsl.ddx.coarse +half4 test_f16_ddx_coarse4(half4 val) { + return ddx_coarse(val); +} + +// CHECK-LABEL: float @_Z19test_f32_ddx_coarsef +// CHECK: %hlsl.ddx.coarse = call {{.*}} float @llvm.dx.ddx.coarse.f32(float %{{.*}}) +// CHECK: ret float %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: float @_Z19test_f32_ddx_coarsef +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} float @llvm.spv.ddx.coarse.f32(float %{{.*}}) +// CHECK-SPIRV: ret float %hlsl.ddx.coarse +float test_f32_ddx_coarse(float val) { + return ddx_coarse(val); +} + +// CHECK-LABEL: <2 x float> @_Z20test_f32_ddx_coarse2Dv2_f +// CHECK: %hlsl.ddx.coarse = call {{.*}} <2 x float> @llvm.dx.ddx.coarse.v2f32(<2 x float> %{{.*}}) +// CHECK: ret <2 x float> %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: <2 x float> @_Z20test_f32_ddx_coarse2Dv2_f +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <2 x float> @llvm.spv.ddx.coarse.v2f32(<2 x float> %{{.*}}) +// CHECK-SPIRV: ret <2 x float> %hlsl.ddx.coarse +float2 test_f32_ddx_coarse2(float2 val) { + return ddx_coarse(val); +} + +// CHECK-LABEL: <3 x float> @_Z20test_f32_ddx_coarse3Dv3_f +// CHECK: %hlsl.ddx.coarse = call {{.*}} <3 x float> @llvm.dx.ddx.coarse.v3f32(<3 x float> %{{.*}}) +// CHECK: ret <3 x float> %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: <3 x float> @_Z20test_f32_ddx_coarse3Dv3_f +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <3 x float> @llvm.spv.ddx.coarse.v3f32(<3 x float> %{{.*}}) +// CHECK-SPIRV: ret <3 x float> %hlsl.ddx.coarse +float3 test_f32_ddx_coarse3(float3 val) { + return ddx_coarse(val); +} + +// CHECK-LABEL: <4 x float> @_Z20test_f32_ddx_coarse4Dv4_f +// CHECK: %hlsl.ddx.coarse = call {{.*}} <4 x float> @llvm.dx.ddx.coarse.v4f32(<4 x float> %{{.*}}) +// CHECK: ret <4 x float> %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: <4 x float> @_Z20test_f32_ddx_coarse4Dv4_f +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <4 x float> @llvm.spv.ddx.coarse.v4f32(<4 x float> %{{.*}}) +// CHECK-SPIRV: ret <4 x float> %hlsl.ddx.coarse +float4 test_f32_ddx_coarse4(float4 val) { + return ddx_coarse(val); +} diff --git a/clang/test/CodeGenHLSL/builtins/ddy-coarse-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/ddy-coarse-builtin.hlsl new file mode 100644 index 0000000000000..2967deb75031f --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/ddy-coarse-builtin.hlsl @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple spirv-pc-vulkan-compute %s \ +// RUN: -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK-SPIRV + +// CHECK-LABEL: half @_Z19test_f16_ddy_coarseDh +// CHECK: %hlsl.ddy.coarse = call {{.*}} half @llvm.dx.ddy.coarse.f16(half %{{.*}}) +// CHECK: ret half %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: half @_Z19test_f16_ddy_coarseDh +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} half @llvm.spv.ddy.coarse.f16(half %{{.*}}) +// CHECK-SPIRV: ret half %hlsl.ddy.coarse +half test_f16_ddy_coarse(half val) { + return __builtin_hlsl_elementwise_ddy_coarse(val); +} + +// CHECK-LABEL: float @_Z19test_f32_ddy_coarsef +// CHECK: %hlsl.ddy.coarse = call {{.*}} float @llvm.dx.ddy.coarse.f32(float %{{.*}}) +// CHECK: ret float %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: float @_Z19test_f32_ddy_coarsef +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} float @llvm.spv.ddy.coarse.f32(float %{{.*}}) +// CHECK-SPIRV: ret float %hlsl.ddy.coarse +float test_f32_ddy_coarse(float val) { + return __builtin_hlsl_elementwise_ddy_coarse(val); +} diff --git a/clang/test/CodeGenHLSL/builtins/ddy-coarse.hlsl b/clang/test/CodeGenHLSL/builtins/ddy-coarse.hlsl new file mode 100644 index 0000000000000..faa972a1be326 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/ddy-coarse.hlsl @@ -0,0 +1,86 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple spirv-pc-vulkan-compute %s \ +// RUN: -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK-SPIRV + +// CHECK-LABEL: half @_Z19test_f16_ddy_coarseDh +// CHECK: %hlsl.ddy.coarse = call {{.*}} half @llvm.dx.ddy.coarse.f16(half %{{.*}}) +// CHECK: ret half %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: half @_Z19test_f16_ddy_coarseDh +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} half @llvm.spv.ddy.coarse.f16(half %{{.*}}) +// CHECK-SPIRV: ret half %hlsl.ddy.coarse +half test_f16_ddy_coarse(half val) { + return ddy_coarse(val); +} + +// CHECK-LABEL: <2 x half> @_Z20test_f16_ddy_coarse2Dv2_Dh +// CHECK: %hlsl.ddy.coarse = call {{.*}} <2 x half> @llvm.dx.ddy.coarse.v2f16(<2 x half> %{{.*}}) +// CHECK: ret <2 x half> %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: <2 x half> @_Z20test_f16_ddy_coarse2Dv2_Dh +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <2 x half> @llvm.spv.ddy.coarse.v2f16(<2 x half> %{{.*}}) +// CHECK-SPIRV: ret <2 x half> %hlsl.ddy.coarse +half2 test_f16_ddy_coarse2(half2 val) { + return ddy_coarse(val); +} + +// CHECK-LABEL: <3 x half> @_Z20test_f16_ddy_coarse3Dv3_Dh +// CHECK: %hlsl.ddy.coarse = call {{.*}} <3 x half> @llvm.dx.ddy.coarse.v3f16(<3 x half> %{{.*}}) +// CHECK: ret <3 x half> %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: <3 x half> @_Z20test_f16_ddy_coarse3Dv3_Dh +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <3 x half> @llvm.spv.ddy.coarse.v3f16(<3 x half> %{{.*}}) +// CHECK-SPIRV: ret <3 x half> %hlsl.ddy.coarse +half3 test_f16_ddy_coarse3(half3 val) { + return ddy_coarse(val); +} + +// CHECK-LABEL: <4 x half> @_Z20test_f16_ddy_coarse4Dv4_Dh +// CHECK: %hlsl.ddy.coarse = call {{.*}} <4 x half> @llvm.dx.ddy.coarse.v4f16(<4 x half> %{{.*}}) +// CHECK: ret <4 x half> %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: <4 x half> @_Z20test_f16_ddy_coarse4Dv4_Dh +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <4 x half> @llvm.spv.ddy.coarse.v4f16(<4 x half> %{{.*}}) +// CHECK-SPIRV: ret <4 x half> %hlsl.ddy.coarse +half4 test_f16_ddy_coarse4(half4 val) { + return ddy_coarse(val); +} + +// CHECK-LABEL: float @_Z19test_f32_ddy_coarsef +// CHECK: %hlsl.ddy.coarse = call {{.*}} float @llvm.dx.ddy.coarse.f32(float %{{.*}}) +// CHECK: ret float %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: float @_Z19test_f32_ddy_coarsef +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} float @llvm.spv.ddy.coarse.f32(float %{{.*}}) +// CHECK-SPIRV: ret float %hlsl.ddy.coarse +float test_f32_ddy_coarse(float val) { + return ddy_coarse(val); +} + +// CHECK-LABEL: <2 x float> @_Z20test_f32_ddy_coarse2Dv2_f +// CHECK: %hlsl.ddy.coarse = call {{.*}} <2 x float> @llvm.dx.ddy.coarse.v2f32(<2 x float> %{{.*}}) +// CHECK: ret <2 x float> %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: <2 x float> @_Z20test_f32_ddy_coarse2Dv2_f +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <2 x float> @llvm.spv.ddy.coarse.v2f32(<2 x float> %{{.*}}) +// CHECK-SPIRV: ret <2 x float> %hlsl.ddy.coarse +float2 test_f32_ddy_coarse2(float2 val) { + return ddy_coarse(val); +} + +// CHECK-LABEL: <3 x float> @_Z20test_f32_ddy_coarse3Dv3_f +// CHECK: %hlsl.ddy.coarse = call {{.*}} <3 x float> @llvm.dx.ddy.coarse.v3f32(<3 x float> %{{.*}}) +// CHECK: ret <3 x float> %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: <3 x float> @_Z20test_f32_ddy_coarse3Dv3_f +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <3 x float> @llvm.spv.ddy.coarse.v3f32(<3 x float> %{{.*}}) +// CHECK-SPIRV: ret <3 x float> %hlsl.ddy.coarse +float3 test_f32_ddy_coarse3(float3 val) { + return ddy_coarse(val); +} + +// CHECK-LABEL: <4 x float> @_Z20test_f32_ddy_coarse4Dv4_f +// CHECK: %hlsl.ddy.coarse = call {{.*}} <4 x float> @llvm.dx.ddy.coarse.v4f32(<4 x float> %{{.*}}) +// CHECK: ret <4 x float> %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: <4 x float> @_Z20test_f32_ddy_coarse4Dv4_f +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <4 x float> @llvm.spv.ddy.coarse.v4f32(<4 x float> %{{.*}}) +// CHECK-SPIRV: ret <4 x float> %hlsl.ddy.coarse +float4 test_f32_ddy_coarse4(float4 val) { + return ddy_coarse(val); +} diff --git a/clang/test/Driver/crash-ir-repro.cpp b/clang/test/Driver/crash-ir-repro.cpp new file mode 100644 index 0000000000000..1f31a5ca1bb34 --- /dev/null +++ b/clang/test/Driver/crash-ir-repro.cpp @@ -0,0 +1,15 @@ +// RUN: %clang -S -emit-llvm -o %t.ll %s +// RUN: not %clang -S -DCRASH %s %t.ll 2>&1 | FileCheck %s + +// CHECK: Preprocessed source(s) and associated run script(s) are located at: +// CHECK-NEXT: clang: note: diagnostic msg: {{.*}}.cpp +// CHECK-NEXT: clang: note: diagnostic msg: {{.*}}.ll +// CHECK-NEXT: clang: note: diagnostic msg: {{.*}}.sh + +#ifdef CRASH +#pragma clang __debug parser_crash +#endif + +int main() { + return 0; +} diff --git a/clang/test/Driver/linker-wrapper-hip-amdgcnspirv.c b/clang/test/Driver/linker-wrapper-hip-amdgcnspirv.c new file mode 100644 index 0000000000000..429f7d3b9ee13 --- /dev/null +++ b/clang/test/Driver/linker-wrapper-hip-amdgcnspirv.c @@ -0,0 +1,16 @@ +// RUN: %clang -cc1 %s -triple "spirv64-amd-amdhsa" -emit-llvm-bc -o %t.bc +// RUN: llvm-offload-binary -o %t.out "--image=file=%t.bc,triple=spirv64-amd-amdhsa,arch=amdgcnspirv,kind=hip" +// RUN: clang-linker-wrapper \ +// RUN: "--should-extract=amdgcnspirv" \ +// RUN: "--host-triple=spirv64-amd-amdhsa" \ +// RUN: "--linker-path=clang-offload-bundler" \ +// RUN: "--emit-fatbin-only" \ +// RUN: "-o" "%t.hipfb" \ +// RUN: "%t.out" \ +// RUN: --dry-run \ +// RUN: 2>&1 | FileCheck %s + +// clang-linker-wrapper was previously calling clang-offload-bundler with -targets=...,hip-amdgcn-amd-amdhsa--amdgcnspirv +// This caused the runtime not to recognise the triple for the AMD SPIR-V code. + +// CHECK: {{".*clang-offload-bundler.*"}} {{.*}} -targets={{.*}},hip-spirv64-amd-amdhsa--amdgcnspirv diff --git a/clang/test/Sema/type-dependent-attrs.c b/clang/test/Sema/type-dependent-attrs.c new file mode 100644 index 0000000000000..13068b3f94ad4 --- /dev/null +++ b/clang/test/Sema/type-dependent-attrs.c @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -std=c23 -fsyntax-only -verify %s + +int open() { return 0; } +void close(typeof(open()) *) {} + +void cleanup_attr() { + int fd_int [[gnu::cleanup(close)]] = open(); + auto fd_auto [[gnu::cleanup(close)]] = open(); + float fd_invalid [[gnu::cleanup(close)]] = open(); // expected-error {{'cleanup' function 'close' parameter has type 'typeof (open()) *' (aka 'int *') which is incompatible with type 'float *'}} +} diff --git a/clang/test/SemaCXX/attr-cleanup.cpp b/clang/test/SemaCXX/attr-cleanup.cpp index 32d10683edebb..6048b4e92ec3f 100644 --- a/clang/test/SemaCXX/attr-cleanup.cpp +++ b/clang/test/SemaCXX/attr-cleanup.cpp @@ -27,3 +27,28 @@ namespace E { int v1 __attribute__((cleanup(c3))); // expected-error {{'c3' is not a single function}} } } + +namespace F { + int open() { return 0; } + void close(decltype(open()) *) {} + + void test1() { + auto fd [[gnu::cleanup(close)]] = open(); + } + + template + void test2() { + Ty fd [[gnu::cleanup(close)]] = open(); + } + + template + void test3() { + Ty fd [[gnu::cleanup(close)]] = open(); // #TEST3_CLEANUP + } + + int main() { + test2(); + test3(); // expected-error@#TEST3_CLEANUP {{'cleanup' function 'close' parameter has type 'decltype(open()) *' (aka 'int *') which is incompatible with type 'float *'}} \ + expected-note {{in instantiation of function template specialization 'F::test3' requested here}} + } +} diff --git a/clang/test/SemaHLSL/BuiltIns/ddx-coarse-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/ddx-coarse-errors.hlsl new file mode 100644 index 0000000000000..ebad1cc6826d8 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/ddx-coarse-errors.hlsl @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -verify +// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-library %s -fnative-half-type -verify + +float no_arg() { + return __builtin_hlsl_elementwise_ddx_coarse(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} +} + +float too_many_args(float val) { + return __builtin_hlsl_elementwise_ddx_coarse(val, val); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} +} + +float test_integer_scalar_input(int val) { + return __builtin_hlsl_elementwise_ddx_coarse(val); + // expected-error@-1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'int')}} +} + +double test_double_scalar_input(double val) { + return __builtin_hlsl_elementwise_ddx_coarse(val); + // expected-error@-1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'double')}} +} diff --git a/clang/test/SemaHLSL/BuiltIns/ddy-coarse-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/ddy-coarse-errors.hlsl new file mode 100644 index 0000000000000..9cc23665882c8 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/ddy-coarse-errors.hlsl @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -verify +// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-library %s -fnative-half-type -verify + +float no_arg() { + return __builtin_hlsl_elementwise_ddy_coarse(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} +} + +float too_many_args(float val) { + return __builtin_hlsl_elementwise_ddy_coarse(val, val); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} +} + +float test_integer_scalar_input(int val) { + return __builtin_hlsl_elementwise_ddy_coarse(val); + // expected-error@-1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'int')}} +} + +double test_double_scalar_input(double val) { + return __builtin_hlsl_elementwise_ddy_coarse(val); + // expected-error@-1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'double')}} +} diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 1ef39d9f2d370..8f4a06526d24d 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -439,8 +439,11 @@ fatbinary(ArrayRef> InputFiles, Args.MakeArgString(Twine("-compression-level=") + Arg->getValue())); SmallVector Targets = {"-targets=host-x86_64-unknown-linux-gnu"}; - for (const auto &[File, Arch] : InputFiles) - Targets.push_back(Saver.save("hip-amdgcn-amd-amdhsa--" + Arch)); + for (const auto &[File, Arch] : InputFiles) { + Targets.push_back(Saver.save(Arch == "amdgcnspirv" + ? "hip-spirv64-amd-amdhsa--" + Arch + : "hip-amdgcn-amd-amdhsa--" + Arch)); + } CmdArgs.push_back(Saver.save(llvm::join(Targets, ","))); #ifdef _WIN32 diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp index e49dcb9b70b0f..bee9a01a3b01a 100644 --- a/clang/utils/TableGen/ClangAttrEmitter.cpp +++ b/clang/utils/TableGen/ClangAttrEmitter.cpp @@ -5045,6 +5045,26 @@ void EmitClangAttrParsedAttrKinds(const RecordKeeper &Records, << "}\n"; } +// Emits Sema calls for type dependent attributes +void EmitClangAttrIsTypeDependent(const RecordKeeper &Records, + raw_ostream &OS) { + emitSourceFileHeader("Attribute is type dependent", OS, Records); + + OS << "void checkAttrIsTypeDependent(Decl *D, const Attr *A) {\n"; + OS << " switch (A->getKind()) {\n"; + OS << " default:\n"; + OS << " break;\n"; + for (const auto *A : Records.getAllDerivedDefinitions("Attr")) { + if (A->getValueAsBit("IsTypeDependent")) { + OS << " case attr::" << A->getName() << ":\n"; + OS << " ActOn" << A->getName() << "Attr(D, A);\n"; + OS << " break;\n"; + } + } + OS << " }\n"; + OS << "}\n"; +} + // Emits the code to dump an attribute. void EmitClangAttrTextNodeDump(const RecordKeeper &Records, raw_ostream &OS) { emitSourceFileHeader("Attribute text node dumper", OS, Records); diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp index 866040d503646..707ce617cb2d0 100644 --- a/clang/utils/TableGen/TableGen.cpp +++ b/clang/utils/TableGen/TableGen.cpp @@ -43,6 +43,7 @@ enum ActionType { GenClangAttrParsedAttrList, GenClangAttrParsedAttrImpl, GenClangAttrParsedAttrKinds, + GenClangAttrIsTypeDependent, GenClangAttrTextNodeDump, GenClangAttrNodeTraverse, GenClangBasicReader, @@ -179,6 +180,9 @@ cl::opt Action( clEnumValN(GenClangAttrParsedAttrKinds, "gen-clang-attr-parsed-attr-kinds", "Generate a clang parsed attribute kinds"), + clEnumValN(GenClangAttrIsTypeDependent, + "gen-clang-attr-is-type-dependent", + "Generate clang is type dependent attribute code"), clEnumValN(GenClangAttrTextNodeDump, "gen-clang-attr-text-node-dump", "Generate clang attribute text node dumper"), clEnumValN(GenClangAttrNodeTraverse, "gen-clang-attr-node-traverse", @@ -423,6 +427,9 @@ bool ClangTableGenMain(raw_ostream &OS, const RecordKeeper &Records) { case GenClangAttrParsedAttrKinds: EmitClangAttrParsedAttrKinds(Records, OS); break; + case GenClangAttrIsTypeDependent: + EmitClangAttrIsTypeDependent(Records, OS); + break; case GenClangAttrTextNodeDump: EmitClangAttrTextNodeDump(Records, OS); break; diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h index fa49dcd289bc2..058bda3ebd246 100644 --- a/clang/utils/TableGen/TableGenBackends.h +++ b/clang/utils/TableGen/TableGenBackends.h @@ -82,6 +82,8 @@ void EmitClangAttrParsedAttrImpl(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitClangAttrParsedAttrKinds(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitClangAttrIsTypeDependent(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); void EmitClangAttrTextNodeDump(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitClangAttrNodeTraverse(const llvm::RecordKeeper &Records, diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py index 3f7dd8e402b78..ea22fb0babc46 100644 --- a/compiler-rt/test/lit.common.cfg.py +++ b/compiler-rt/test/lit.common.cfg.py @@ -875,7 +875,7 @@ def is_windows_lto_supported(): config.substitutions.append( ( "%ld_flags_rpath_so" + postfix, - "-install_name @rpath/`basename %dynamiclib{}`".format(postfix), + "-install_name @rpath/%base_dynamiclib{}".format(postfix), ) ) elif config.target_os in ("FreeBSD", "NetBSD", "OpenBSD"): @@ -908,6 +908,9 @@ def is_windows_lto_supported(): config.substitutions.append( ("%dynamiclib" + postfix, "%t.dir/%xdynamiclib_filename" + postfix) ) + config.substitutions.append( + ("%base_dynamiclib" + postfix, "%xdynamiclib_filename" + postfix) + ) config.substitutions.append( ( "%xdynamiclib_filename" + postfix, diff --git a/compiler-rt/test/rtsan/Darwin/dlopen.cpp b/compiler-rt/test/rtsan/Darwin/dlopen.cpp index 1aabe5cb6e580..435a4353b7026 100644 --- a/compiler-rt/test/rtsan/Darwin/dlopen.cpp +++ b/compiler-rt/test/rtsan/Darwin/dlopen.cpp @@ -8,18 +8,19 @@ // RUN: %clangxx -fsanitize=realtime %s -o %t.so -shared -DSHARED_LIB // RUN: %clangxx %s -o %t -// RUN: RTSAN_DYLIB_PATH=`%clangxx -fsanitize=realtime %s -### 2>&1 \ +// RUN: %clangxx -fsanitize=realtime %s -### 2>&1 \ // RUN: | grep "libclang_rt.rtsan_osx_dynamic.dylib" \ -// RUN: | sed -e 's/.*"\(.*libclang_rt.rtsan_osx_dynamic.dylib\)".*/\1/'` +// RUN: | sed -e 's/.*"\(.*libclang_rt.rtsan_osx_dynamic.dylib\)".*/\1/' \ +// RUN: | tr -d '\n' > %t.rtsan_dylib_path // Launching a non-instrumented binary that dlopen's an instrumented library should fail. // RUN: not %run %t %t.so 2>&1 | FileCheck %s --check-prefix=CHECK-FAIL // Launching a non-instrumented binary with an explicit DYLD_INSERT_LIBRARIES should work. -// RUN: DYLD_INSERT_LIBRARIES=$RTSAN_DYLIB_PATH %run %t %t.so 2>&1 | FileCheck %s +// RUN: env DYLD_INSERT_LIBRARIES="%{readfile:%t.rtsan_dylib_path}" %run %t %t.so 2>&1 | FileCheck %s // Launching an instrumented binary with the DYLD_INSERT_LIBRARIES env variable has no error // RUN: %clangxx -fsanitize=realtime %s -o %t -// RUN: DYLD_INSERT_LIBRARIES=$RTSAN_DYLIB_PATH %run %t %t.so 2>&1 | FileCheck %s --check-prefix=CHECK-INSTRUMENTED +// RUN: env DYLD_INSERT_LIBRARIES="%{readfile:%t.rtsan_dylib_path}" %run %t %t.so 2>&1 | FileCheck %s --check-prefix=CHECK-INSTRUMENTED #include #include diff --git a/compiler-rt/test/tsan/Darwin/dlopen.cpp b/compiler-rt/test/tsan/Darwin/dlopen.cpp index 3d12b815f9c25..2ab052f1c0c26 100644 --- a/compiler-rt/test/tsan/Darwin/dlopen.cpp +++ b/compiler-rt/test/tsan/Darwin/dlopen.cpp @@ -9,14 +9,15 @@ // RUN: %clangxx_tsan %s -o %t.so -shared -DSHARED_LIB // RUN: %clangxx_tsan -fno-sanitize=thread %s -o %t -// RUN: TSAN_DYLIB_PATH=`%clangxx_tsan %s -### 2>&1 \ +// RUN: %clangxx_tsan %s -### 2>&1 \ // RUN: | grep "libclang_rt.tsan_osx_dynamic.dylib" \ -// RUN: | sed -e 's/.*"\(.*libclang_rt.tsan_osx_dynamic.dylib\)".*/\1/'` +// RUN: | sed -e 's/.*"\(.*libclang_rt.tsan_osx_dynamic.dylib\)".*/\1/' \ +// RUN: | tr -d '\n' > %t.tsan_dylib_path // Launching a non-instrumented binary that dlopen's an instrumented library should fail. // RUN: not %run %t %t.so 2>&1 | FileCheck %s --check-prefix=CHECK-FAIL // Launching a non-instrumented binary with an explicit DYLD_INSERT_LIBRARIES should work. -// RUN: DYLD_INSERT_LIBRARIES=$TSAN_DYLIB_PATH %run %t %t.so 2>&1 | FileCheck %s +// RUN: env DYLD_INSERT_LIBRARIES="%{readfile:%t.tsan_dylib_path}" %run %t %t.so 2>&1 | FileCheck %s #include #include diff --git a/compiler-rt/test/tsan/Darwin/external-ignore-noninstrumented.cpp b/compiler-rt/test/tsan/Darwin/external-ignore-noninstrumented.cpp index 916b0b893fc0d..cfa46e0f0a213 100644 --- a/compiler-rt/test/tsan/Darwin/external-ignore-noninstrumented.cpp +++ b/compiler-rt/test/tsan/Darwin/external-ignore-noninstrumented.cpp @@ -1,8 +1,10 @@ +// RUN: basename %t-lib.dylib | tr -d '\n' > %t.basename // RUN: %clangxx_tsan -shared %p/external-lib.cpp -fno-sanitize=thread -DUSE_TSAN_CALLBACKS \ -// RUN: -o %t-lib.dylib -install_name @rpath/`basename %t-lib.dylib` +// RUN: -o %t-lib.dylib -install_name @rpath/%{readfile:%t.basename} +// RUN: basename %t-module.dylib | tr -d '\n' > %t.basename // RUN: %clangxx_tsan -shared %p/external-noninstrumented-module.cpp %t-lib.dylib -fno-sanitize=thread \ -// RUN: -o %t-module.dylib -install_name @rpath/`basename %t-module.dylib` +// RUN: -o %t-module.dylib -install_name @rpath/%{readfile:%t.basename} // RUN: %clangxx_tsan %s %t-module.dylib -o %t // RUN: %run %t 2>&1 | FileCheck %s diff --git a/compiler-rt/test/tsan/Darwin/external.cpp b/compiler-rt/test/tsan/Darwin/external.cpp index bf189eb1d6b5b..52fae36f0e1f4 100644 --- a/compiler-rt/test/tsan/Darwin/external.cpp +++ b/compiler-rt/test/tsan/Darwin/external.cpp @@ -1,14 +1,17 @@ +// RUN: basename %t-lib-instrumented.dylib | tr -d '\n' > %t.basename // RUN: %clangxx_tsan %p/external-lib.cpp -shared \ // RUN: -o %t-lib-instrumented.dylib \ -// RUN: -install_name @rpath/`basename %t-lib-instrumented.dylib` +// RUN: -install_name @rpath/%{readfile:%t.basename} +// RUN: basename %t-lib-noninstrumented.dylib | tr -d '\n' > %t.basename // RUN: %clangxx_tsan %p/external-lib.cpp -shared -fno-sanitize=thread \ // RUN: -o %t-lib-noninstrumented.dylib \ -// RUN: -install_name @rpath/`basename %t-lib-noninstrumented.dylib` +// RUN: -install_name @rpath/%{readfile:%t.basename} +// RUN: basename %t-lib-noninstrumented-callbacks.dylib | tr -d '\n' > %t.basename // RUN: %clangxx_tsan %p/external-lib.cpp -shared -fno-sanitize=thread -DUSE_TSAN_CALLBACKS \ // RUN: -o %t-lib-noninstrumented-callbacks.dylib \ -// RUN: -install_name @rpath/`basename %t-lib-noninstrumented-callbacks.dylib` +// RUN: -install_name @rpath/%{readfile:%t.basename} // RUN: %clangxx_tsan %s %t-lib-instrumented.dylib -o %t-lib-instrumented // RUN: %clangxx_tsan %s %t-lib-noninstrumented.dylib -o %t-lib-noninstrumented diff --git a/compiler-rt/test/tsan/Darwin/malloc-stack-logging.cpp b/compiler-rt/test/tsan/Darwin/malloc-stack-logging.cpp index 8d9c2122d0e6c..0a96e346f8012 100644 --- a/compiler-rt/test/tsan/Darwin/malloc-stack-logging.cpp +++ b/compiler-rt/test/tsan/Darwin/malloc-stack-logging.cpp @@ -4,7 +4,7 @@ // use syscalls directly) to make sure other interceptors aren't called. // RUN: %clangxx_tsan -O1 %s -o %t -// RUN: MallocStackLogging=1 %run %t 2>&1 | FileCheck %s +// RUN: env MallocStackLogging=1 %run %t 2>&1 | FileCheck %s #include #include #include diff --git a/libcxx/include/__config b/libcxx/include/__config index 8f461599ffd5b..d79ace0cbb896 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -546,6 +546,12 @@ typedef __char32_t char32_t; # define _LIBCPP_DEPRECATED_(m) # endif +# if defined(__DEPRECATED) && __DEPRECATED && !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS) +# define _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS 1 +# else +# define _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS 0 +# endif + # if !defined(_LIBCPP_CXX03_LANG) # define _LIBCPP_DEPRECATED_IN_CXX11 _LIBCPP_DEPRECATED # else diff --git a/libcxx/include/ccomplex b/libcxx/include/ccomplex index ee7e088aac54d..c1cb039f83a5e 100644 --- a/libcxx/include/ccomplex +++ b/libcxx/include/ccomplex @@ -26,18 +26,10 @@ # pragma GCC system_header # endif -# if _LIBCPP_STD_VER >= 20 - -using __standard_header_ccomplex - _LIBCPP_DEPRECATED_("removed in C++20. Include instead.") _LIBCPP_NODEBUG = void; -using __use_standard_header_ccomplex _LIBCPP_NODEBUG = __standard_header_ccomplex; - -# elif _LIBCPP_STD_VER >= 17 - -using __standard_header_ccomplex _LIBCPP_DEPRECATED_("Include instead.") _LIBCPP_NODEBUG = void; -using __use_standard_header_ccomplex _LIBCPP_NODEBUG = __standard_header_ccomplex; - +# if _LIBCPP_STD_VER >= 17 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS +# warning is deprecated in C++17 and removed in C++20. Include instead. # endif + #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) #endif // _LIBCPP_CCOMPLEX diff --git a/libcxx/include/ciso646 b/libcxx/include/ciso646 index 34164362dc10d..d9eae41291024 100644 --- a/libcxx/include/ciso646 +++ b/libcxx/include/ciso646 @@ -24,13 +24,10 @@ # pragma GCC system_header # endif -# if _LIBCPP_STD_VER >= 20 - -using __standard_header_ciso646 - _LIBCPP_DEPRECATED_("removed in C++20. Include instead.") _LIBCPP_NODEBUG = void; -using __use_standard_header_ciso646 _LIBCPP_NODEBUG = __standard_header_ciso646; - +# if _LIBCPP_STD_VER >= 20 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS +# warning is removed in C++20. Include instead. # endif + #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) #endif // _LIBCPP_CISO646 diff --git a/libcxx/include/cstdalign b/libcxx/include/cstdalign index 7f8dd1e1fbaf8..7aa8cc81ad14c 100644 --- a/libcxx/include/cstdalign +++ b/libcxx/include/cstdalign @@ -43,17 +43,10 @@ Macros: # undef __alignof_is_defined # define __alignof_is_defined 1 -# if _LIBCPP_STD_VER >= 20 - -using __standard_header_cstdalign _LIBCPP_DEPRECATED_("removed in C++20.") _LIBCPP_NODEBUG = void; -using __use_standard_header_cstdalign _LIBCPP_NODEBUG = __standard_header_cstdalign; - -# elif _LIBCPP_STD_VER >= 17 - -using __standard_header_cstdalign _LIBCPP_DEPRECATED _LIBCPP_NODEBUG = void; -using __use_standard_header_cstdalign _LIBCPP_NODEBUG = __standard_header_cstdalign; - +# if _LIBCPP_STD_VER >= 17 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS +# warning is deprecated in C++17 and removed in C++20. # endif + #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) #endif // _LIBCPP_CSTDALIGN diff --git a/libcxx/include/cstdbool b/libcxx/include/cstdbool index a432d5f08b9ae..805a287bd7627 100644 --- a/libcxx/include/cstdbool +++ b/libcxx/include/cstdbool @@ -31,17 +31,10 @@ Macros: # undef __bool_true_false_are_defined # define __bool_true_false_are_defined 1 -# if _LIBCPP_STD_VER >= 20 - -using __standard_header_cstdbool _LIBCPP_DEPRECATED_("removed in C++20.") _LIBCPP_NODEBUG = void; -using __use_standard_header_cstdbool _LIBCPP_NODEBUG = __standard_header_cstdbool; - -# elif _LIBCPP_STD_VER >= 17 - -using __standard_header_cstdbool _LIBCPP_DEPRECATED _LIBCPP_NODEBUG = void; -using __use_standard_header_cstdbool _LIBCPP_NODEBUG = __standard_header_cstdbool; - +# if _LIBCPP_STD_VER >= 17 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS +# warning is deprecated in C++17 and removed in C++20. # endif + #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) #endif // _LIBCPP_CSTDBOOL diff --git a/libcxx/include/ctgmath b/libcxx/include/ctgmath index db0786f1e2c46..13b7a96e4d8fc 100644 --- a/libcxx/include/ctgmath +++ b/libcxx/include/ctgmath @@ -28,17 +28,8 @@ # pragma GCC system_header # endif -# if _LIBCPP_STD_VER >= 20 - -using __standard_header_ctgmath - _LIBCPP_DEPRECATED_("removed in C++20. Include and instead.") _LIBCPP_NODEBUG = void; -using __use_standard_header_ctgmath _LIBCPP_NODEBUG = __standard_header_ctgmath; - -# elif _LIBCPP_STD_VER >= 17 - -using __standard_header_ctgmath _LIBCPP_DEPRECATED_("Include and instead.") _LIBCPP_NODEBUG = void; -using __use_standard_header_ctgmath _LIBCPP_NODEBUG = __standard_header_ctgmath; - +# if _LIBCPP_STD_VER >= 17 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS +# warning is deprecated in C++17 and removed in C++20. Include and instead. # endif #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) diff --git a/libcxx/test/libcxx/transitive_includes.gen.py b/libcxx/test/libcxx/transitive_includes.gen.py index 6ed35af7e275e..2b643e1f2ad48 100644 --- a/libcxx/test/libcxx/transitive_includes.gen.py +++ b/libcxx/test/libcxx/transitive_includes.gen.py @@ -89,7 +89,7 @@ // UNSUPPORTED: LIBCXX-FREEBSD-FIXME // RUN: mkdir %t -// RUN: %{{cxx}} %s %{{flags}} %{{compile_flags}} --trace-includes -fshow-skipped-includes --preprocess > /dev/null 2> %t/trace-includes.txt +// RUN: %{{cxx}} %s %{{flags}} %{{compile_flags}} -Wno-deprecated --trace-includes -fshow-skipped-includes --preprocess > /dev/null 2> %t/trace-includes.txt // RUN: %{{python}} %{{libcxx-dir}}/test/libcxx/transitive_includes/to_csv.py %t/trace-includes.txt > %t/actual_transitive_includes.csv // RUN: cat %{{libcxx-dir}}/test/libcxx/transitive_includes/%{{cxx_std}}.csv | awk '/^{escaped_header} / {{ print }}' > %t/expected_transitive_includes.csv // RUN: diff -w %t/expected_transitive_includes.csv %t/actual_transitive_includes.csv diff --git a/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp index 0eaf82ce5cef0..8df89d0ba9206 100644 --- a/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp +++ b/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp @@ -14,12 +14,6 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: clang-modules-build -#include "test_macros.h" - #include -#if TEST_STD_VER >= 20 -// expected-warning@ccomplex:* {{'__standard_header_ccomplex' is deprecated: removed in C++20. Include instead.}} -#else -// expected-warning@ccomplex:* {{'__standard_header_ccomplex' is deprecated: Include instead.}} -#endif +// expected-warning@ccomplex:* {{ is deprecated in C++17 and removed in C++20. Include instead.}} diff --git a/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp index 04acd10081548..32b57033331c8 100644 --- a/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp +++ b/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp @@ -15,4 +15,5 @@ // UNSUPPORTED: clang-modules-build #include -// expected-warning@ciso646:* {{'__standard_header_ciso646' is deprecated: removed in C++20. Include instead.}} + +// expected-warning@ciso646:* {{ is removed in C++20. Include instead.}} diff --git a/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp index dc9f1af55b3f1..23a7709a9d658 100644 --- a/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp +++ b/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp @@ -14,12 +14,6 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: clang-modules-build -#include "test_macros.h" - #include -#if TEST_STD_VER >= 20 -// expected-warning@cstdalign:* {{'__standard_header_cstdalign' is deprecated: removed in C++20.}} -#else -// expected-warning@cstdalign:* {{'__standard_header_cstdalign' is deprecated}} -#endif +// expected-warning@cstdalign:* {{ is deprecated in C++17 and removed in C++20.}} diff --git a/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp index eddefe14d35ea..c2c0f03c52d3c 100644 --- a/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp +++ b/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp @@ -14,12 +14,6 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: clang-modules-build -#include "test_macros.h" - #include -#if TEST_STD_VER >= 20 -// expected-warning@cstdbool:* {{'__standard_header_cstdbool' is deprecated: removed in C++20.}} -#else -// expected-warning@cstdbool:* {{'__standard_header_cstdbool' is deprecated}} -#endif +// expected-warning@cstdbool:* {{ is deprecated in C++17 and removed in C++20.}} diff --git a/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp index 097ab1643d15a..4f5564915443d 100644 --- a/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp +++ b/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp @@ -14,12 +14,6 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: clang-modules-build -#include "test_macros.h" - #include -#if TEST_STD_VER >= 20 -// expected-warning@ctgmath:* {{'__standard_header_ctgmath' is deprecated: removed in C++20. Include and instead.}} -#else -// expected-warning@ctgmath:* {{'__standard_header_ctgmath' is deprecated: Include and instead.}} -#endif +// expected-warning@ctgmath:* {{ is deprecated in C++17 and removed in C++20. Include and instead.}} diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.verify.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.verify.cpp index 12d778408d5ec..e58e760a5ce81 100644 --- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.verify.cpp +++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.verify.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// REQUIRES: std-at-least-c++23 +// REQUIRES: std-at-least-c++26 // @@ -21,11 +21,6 @@ void test() { // expected-error@*:* {{static assertion failed}} - // Turns to an error since C++26 (Disallow Binding a Returned Glvalue to a Temporary https://wg21.link/P2748R5). -#if TEST_STD_VER >= 26 // expected-error@tuple:* {{returning reference to local temporary object}} -#else - // expected-warning@tuple:* {{returning reference to local temporary object}} -#endif std::ignore = std::make_from_tuple(std::tuple{}); } diff --git a/libcxx/utils/libcxx/test/format.py b/libcxx/utils/libcxx/test/format.py index 975209c273f8c..76e9115295b99 100644 --- a/libcxx/utils/libcxx/test/format.py +++ b/libcxx/utils/libcxx/test/format.py @@ -99,7 +99,7 @@ def parseScript(test, preamble): substitutions.append( ( "%{verify}", - "%{cxx} %s %{flags} %{compile_flags} -fsyntax-only -Wno-error -Xclang -verify -Xclang -verify-ignore-unexpected=note -ferror-limit=0", + "%{cxx} %s %{flags} %{compile_flags} -U_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER -fsyntax-only -Wno-error -Xclang -verify -Xclang -verify-ignore-unexpected=note -ferror-limit=0", ) ) substitutions.append(("%{run}", "%{exec} %t.exe")) diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 2a97df4785ecb..b0dc797292511 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -762,7 +762,7 @@ void AArch64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, relocateNoSym(loc, R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, val); break; default: - llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); + llvm_unreachable("unsupported relocation for TLS GD to IE relaxation"); } } diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake index 9a2e73a1e3718..84c03cd6432ed 100644 --- a/llvm/cmake/modules/TableGen.cmake +++ b/llvm/cmake/modules/TableGen.cmake @@ -66,6 +66,16 @@ function(tablegen project ofn) list(APPEND LLVM_TABLEGEN_FLAGS "-omit-comments") endif() + set(EXTRA_OUTPUTS) + if("-gen-register-info" IN_LIST ARGN) + cmake_path(GET ofn STEM OUTPUT_BASENAME) + list(APPEND EXTRA_OUTPUTS + ${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_BASENAME}Enums.inc + ${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_BASENAME}Header.inc + ${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_BASENAME}MCDesc.inc + ${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_BASENAME}TargetDesc.inc) + endif() + # MSVC can't support long string literals ("long" > 65534 bytes)[1], so if there's # a possibility of generated tables being consumed by MSVC, generate arrays of # char literals, instead. If we're cross-compiling, then conservatively assume @@ -126,7 +136,7 @@ function(tablegen project ofn) set(LLVM_TABLEGEN_JOB_POOL "") endif() - add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn} + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn} ${EXTRA_OUTPUTS} COMMAND ${tablegen_exe} ${ARG_UNPARSED_ARGUMENTS} ${tblgen_includes} ${LLVM_TABLEGEN_FLAGS} diff --git a/llvm/docs/TableGen/BackEnds.rst b/llvm/docs/TableGen/BackEnds.rst index 7f571378860b2..1e3cb8783df16 100644 --- a/llvm/docs/TableGen/BackEnds.rst +++ b/llvm/docs/TableGen/BackEnds.rst @@ -355,6 +355,13 @@ ClangAttrParsedAttrKinds ``AttributeList::getKind`` function, mapping a string (and syntax) to a parsed attribute ``AttributeList::Kind`` enumeration. +ClangAttrIsTypeDependent +------------------------ + +**Purpose**: Creates ``AttrIsTypeDependent.inc``, which is used to implement the +``Sema::CheckAttributesOnDeducedType`` function, mapping an attribute kind to a +Sema function if it exists. + ClangAttrDump ------------- diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index cec7d09f494d6..4c932c523e423 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3492,6 +3492,13 @@ class LLVM_ABI TargetLoweringBase { return MathUsed && (VT.isSimple() || !isOperationExpand(Opcode, VT)); } + // Return true if the target wants to optimize the mul overflow intrinsic + // for the given \p VT. + virtual bool shouldOptimizeMulOverflowWithZeroHighBits(LLVMContext &Context, + EVT VT) const { + return false; + } + // Return true if it is profitable to use a scalar input to a BUILD_VECTOR // even if the vector itself has multiple uses. virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const { diff --git a/llvm/include/llvm/IR/Constant.h b/llvm/include/llvm/IR/Constant.h index 0be1fc172ebd4..e8ce453559ed7 100644 --- a/llvm/include/llvm/IR/Constant.h +++ b/llvm/include/llvm/IR/Constant.h @@ -79,6 +79,9 @@ class Constant : public User { /// Return true if the value is the smallest signed value. LLVM_ABI bool isMinSignedValue() const; + /// Return true if the value is the largest signed value. + LLVM_ABI bool isMaxSignedValue() const; + /// Return true if this is a finite and non-zero floating-point scalar /// constant or a fixed width vector constant with all finite and non-zero /// elements. diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index d7db935ee07f1..5a4cc776b26a5 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -170,6 +170,8 @@ def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0> [LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>; def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; def int_dx_discard : DefaultAttrsIntrinsic<[], [llvm_i1_ty], []>; +def int_dx_ddx_coarse : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; +def int_dx_ddy_coarse : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; def int_dx_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index f39c6cda2c579..2f7c25550a0cc 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -134,6 +134,8 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty] def int_spv_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], [IntrConvergent]>; def int_spv_discard : DefaultAttrsIntrinsic<[], [], []>; + def int_spv_ddx_coarse : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + def int_spv_ddy_coarse : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; def int_spv_uclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_sclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_nclamp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; diff --git a/llvm/include/llvm/TableGen/Main.h b/llvm/include/llvm/TableGen/Main.h index bafce3a463acc..daede9f5a46f0 100644 --- a/llvm/include/llvm/TableGen/Main.h +++ b/llvm/include/llvm/TableGen/Main.h @@ -14,7 +14,6 @@ #define LLVM_TABLEGEN_MAIN_H #include "llvm/Support/CommandLine.h" -#include #include namespace llvm { @@ -30,18 +29,17 @@ struct TableGenOutputFiles { }; /// Returns true on error, false otherwise. -using TableGenMainFn = bool(raw_ostream &OS, const RecordKeeper &Records); +using TableGenMainFn = + function_ref; /// Perform the action using Records, and store output in OutFiles. /// Returns true on error, false otherwise. -using MultiFileTableGenMainFn = bool(TableGenOutputFiles &OutFiles, - const RecordKeeper &Records); +using MultiFileTableGenMainFn = function_ref; -int TableGenMain(const char *argv0, - std::function MainFn = nullptr); +int TableGenMain(const char *argv0, TableGenMainFn MainFn = nullptr); -int TableGenMain(const char *argv0, - std::function MainFn = nullptr); +int TableGenMain(const char *argv0, MultiFileTableGenMainFn MainFn = nullptr); /// Controls emitting large character arrays as strings or character arrays. /// Typically set to false when building with MSVC. diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 6f44713bd22cd..8968f6b934d77 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -41,6 +41,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Statepoint.h" @@ -6676,6 +6677,62 @@ static MinMaxOptResult OptimizeConstMinMax(const Constant *RHSConst, return MinMaxOptResult::CannotOptimize; } +static Value *simplifySVEIntReduction(Intrinsic::ID IID, Type *ReturnType, + Value *Op0, Value *Op1) { + Constant *C0 = dyn_cast(Op0); + Constant *C1 = dyn_cast(Op1); + unsigned Width = ReturnType->getPrimitiveSizeInBits(); + + // All false predicate or reduction of neutral values ==> neutral result. + switch (IID) { + case Intrinsic::aarch64_sve_eorv: + case Intrinsic::aarch64_sve_orv: + case Intrinsic::aarch64_sve_saddv: + case Intrinsic::aarch64_sve_uaddv: + case Intrinsic::aarch64_sve_umaxv: + if ((C0 && C0->isNullValue()) || (C1 && C1->isNullValue())) + return ConstantInt::get(ReturnType, 0); + break; + case Intrinsic::aarch64_sve_andv: + case Intrinsic::aarch64_sve_uminv: + if ((C0 && C0->isNullValue()) || (C1 && C1->isAllOnesValue())) + return ConstantInt::get(ReturnType, APInt::getMaxValue(Width)); + break; + case Intrinsic::aarch64_sve_smaxv: + if ((C0 && C0->isNullValue()) || (C1 && C1->isMinSignedValue())) + return ConstantInt::get(ReturnType, APInt::getSignedMinValue(Width)); + break; + case Intrinsic::aarch64_sve_sminv: + if ((C0 && C0->isNullValue()) || (C1 && C1->isMaxSignedValue())) + return ConstantInt::get(ReturnType, APInt::getSignedMaxValue(Width)); + break; + } + + switch (IID) { + case Intrinsic::aarch64_sve_andv: + case Intrinsic::aarch64_sve_orv: + case Intrinsic::aarch64_sve_smaxv: + case Intrinsic::aarch64_sve_sminv: + case Intrinsic::aarch64_sve_umaxv: + case Intrinsic::aarch64_sve_uminv: + // sve_reduce_##(all, splat(X)) ==> X + if (C0 && C0->isAllOnesValue()) { + if (Value *SplatVal = getSplatValue(Op1)) { + assert(SplatVal->getType() == ReturnType && "Unexpected result type!"); + return SplatVal; + } + } + break; + case Intrinsic::aarch64_sve_eorv: + // sve_reduce_xor(all, splat(X)) ==> 0 + if (C0 && C0->isAllOnesValue()) + return ConstantInt::get(ReturnType, 0); + break; + } + + return nullptr; +} + Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType, Value *Op0, Value *Op1, const SimplifyQuery &Q, @@ -7037,6 +7094,17 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType, break; } + + case Intrinsic::aarch64_sve_andv: + case Intrinsic::aarch64_sve_eorv: + case Intrinsic::aarch64_sve_orv: + case Intrinsic::aarch64_sve_saddv: + case Intrinsic::aarch64_sve_smaxv: + case Intrinsic::aarch64_sve_sminv: + case Intrinsic::aarch64_sve_uaddv: + case Intrinsic::aarch64_sve_umaxv: + case Intrinsic::aarch64_sve_uminv: + return simplifySVEIntReduction(IID, ReturnType, Op0, Op1); default: break; } diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp index d35c17e3afd2b..877ae146ce79f 100644 --- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -616,9 +616,8 @@ void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) { /// EnumerateValueSymbolTable - Insert all of the values in the specified symbol /// table into the values table. void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) { - for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end(); - VI != VE; ++VI) - EnumerateValue(VI->getValue()); + for (const auto &VI : VST) + EnumerateValue(VI.getValue()); } /// Insert all of the values referenced by named metadata in the specified diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 7068eb440e31b..fbb35cedf5274 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -432,6 +432,8 @@ class CodeGenPrepare { bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy, unsigned AddrSpace); bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr); + bool optimizeMulWithOverflow(Instruction *I, bool IsSigned, + ModifyDT &ModifiedDT); bool optimizeInlineAsmInst(CallInst *CS); bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT); bool optimizeExt(Instruction *&I); @@ -2798,6 +2800,10 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) { } } return false; + case Intrinsic::umul_with_overflow: + return optimizeMulWithOverflow(II, /*IsSigned=*/false, ModifiedDT); + case Intrinsic::smul_with_overflow: + return optimizeMulWithOverflow(II, /*IsSigned=*/true, ModifiedDT); } SmallVector PtrOps; @@ -6437,6 +6443,182 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst, return true; } +// This is a helper for CodeGenPrepare::optimizeMulWithOverflow. +// Check the pattern we are interested in where there are maximum 2 uses +// of the intrinsic which are the extract instructions. +static bool matchOverflowPattern(Instruction *&I, ExtractValueInst *&MulExtract, + ExtractValueInst *&OverflowExtract) { + // Bail out if it's more than 2 users: + if (I->hasNUsesOrMore(3)) + return false; + + for (User *U : I->users()) { + auto *Extract = dyn_cast(U); + if (!Extract || Extract->getNumIndices() != 1) + return false; + + unsigned Index = Extract->getIndices()[0]; + if (Index == 0) + MulExtract = Extract; + else if (Index == 1) + OverflowExtract = Extract; + else + return false; + } + return true; +} + +// Rewrite the mul_with_overflow intrinsic by checking if both of the +// operands' value ranges are within the legal type. If so, we can optimize the +// multiplication algorithm. This code is supposed to be written during the step +// of type legalization, but given that we need to reconstruct the IR which is +// not doable there, we do it here. +// The IR after the optimization will look like: +// entry: +// if signed: +// ( (lhs_lo>>BW-1) ^ lhs_hi) || ( (rhs_lo>>BW-1) ^ rhs_hi) ? overflow, +// overflow_no +// else: +// (lhs_hi != 0) || (rhs_hi != 0) ? overflow, overflow_no +// overflow_no: +// overflow: +// overflow.res: +// \returns true if optimization was applied +// TODO: This optimization can be further improved to optimize branching on +// overflow where the 'overflow_no' BB can branch directly to the false +// successor of overflow, but that would add additional complexity so we leave +// it for future work. +bool CodeGenPrepare::optimizeMulWithOverflow(Instruction *I, bool IsSigned, + ModifyDT &ModifiedDT) { + // Check if target supports this optimization. + if (!TLI->shouldOptimizeMulOverflowWithZeroHighBits( + I->getContext(), + TLI->getValueType(*DL, I->getType()->getContainedType(0)))) + return false; + + ExtractValueInst *MulExtract = nullptr, *OverflowExtract = nullptr; + if (!matchOverflowPattern(I, MulExtract, OverflowExtract)) + return false; + + // Keep track of the instruction to stop reoptimizing it again. + InsertedInsts.insert(I); + + Value *LHS = I->getOperand(0); + Value *RHS = I->getOperand(1); + Type *Ty = LHS->getType(); + unsigned VTHalfBitWidth = Ty->getScalarSizeInBits() / 2; + Type *LegalTy = Ty->getWithNewBitWidth(VTHalfBitWidth); + + // New BBs: + BasicBlock *OverflowEntryBB = + I->getParent()->splitBasicBlock(I, "", /*Before*/ true); + OverflowEntryBB->takeName(I->getParent()); + // Keep the 'br' instruction that is generated as a result of the split to be + // erased/replaced later. + Instruction *OldTerminator = OverflowEntryBB->getTerminator(); + BasicBlock *NoOverflowBB = + BasicBlock::Create(I->getContext(), "overflow.no", I->getFunction()); + NoOverflowBB->moveAfter(OverflowEntryBB); + BasicBlock *OverflowBB = + BasicBlock::Create(I->getContext(), "overflow", I->getFunction()); + OverflowBB->moveAfter(NoOverflowBB); + + // BB overflow.entry: + IRBuilder<> Builder(OverflowEntryBB); + // Extract low and high halves of LHS: + Value *LoLHS = Builder.CreateTrunc(LHS, LegalTy, "lo.lhs"); + Value *HiLHS = Builder.CreateLShr(LHS, VTHalfBitWidth, "lhs.lsr"); + HiLHS = Builder.CreateTrunc(HiLHS, LegalTy, "hi.lhs"); + + // Extract low and high halves of RHS: + Value *LoRHS = Builder.CreateTrunc(RHS, LegalTy, "lo.rhs"); + Value *HiRHS = Builder.CreateLShr(RHS, VTHalfBitWidth, "rhs.lsr"); + HiRHS = Builder.CreateTrunc(HiRHS, LegalTy, "hi.rhs"); + + Value *IsAnyBitTrue; + if (IsSigned) { + Value *SignLoLHS = + Builder.CreateAShr(LoLHS, VTHalfBitWidth - 1, "sign.lo.lhs"); + Value *SignLoRHS = + Builder.CreateAShr(LoRHS, VTHalfBitWidth - 1, "sign.lo.rhs"); + Value *XorLHS = Builder.CreateXor(HiLHS, SignLoLHS); + Value *XorRHS = Builder.CreateXor(HiRHS, SignLoRHS); + Value *Or = Builder.CreateOr(XorLHS, XorRHS, "or.lhs.rhs"); + IsAnyBitTrue = Builder.CreateCmp(ICmpInst::ICMP_NE, Or, + ConstantInt::getNullValue(Or->getType())); + } else { + Value *CmpLHS = Builder.CreateCmp(ICmpInst::ICMP_NE, HiLHS, + ConstantInt::getNullValue(LegalTy)); + Value *CmpRHS = Builder.CreateCmp(ICmpInst::ICMP_NE, HiRHS, + ConstantInt::getNullValue(LegalTy)); + IsAnyBitTrue = Builder.CreateOr(CmpLHS, CmpRHS, "or.lhs.rhs"); + } + Builder.CreateCondBr(IsAnyBitTrue, OverflowBB, NoOverflowBB); + + // BB overflow.no: + Builder.SetInsertPoint(NoOverflowBB); + Value *ExtLoLHS, *ExtLoRHS; + if (IsSigned) { + ExtLoLHS = Builder.CreateSExt(LoLHS, Ty, "lo.lhs.ext"); + ExtLoRHS = Builder.CreateSExt(LoRHS, Ty, "lo.rhs.ext"); + } else { + ExtLoLHS = Builder.CreateZExt(LoLHS, Ty, "lo.lhs.ext"); + ExtLoRHS = Builder.CreateZExt(LoRHS, Ty, "lo.rhs.ext"); + } + + Value *Mul = Builder.CreateMul(ExtLoLHS, ExtLoRHS, "mul.overflow.no"); + + // Create the 'overflow.res' BB to merge the results of + // the two paths: + BasicBlock *OverflowResBB = I->getParent(); + OverflowResBB->setName("overflow.res"); + + // BB overflow.no: jump to overflow.res BB + Builder.CreateBr(OverflowResBB); + // No we don't need the old terminator in overflow.entry BB, erase it: + OldTerminator->eraseFromParent(); + + // BB overflow.res: + Builder.SetInsertPoint(OverflowResBB, OverflowResBB->getFirstInsertionPt()); + // Create PHI nodes to merge results from no.overflow BB and overflow BB to + // replace the extract instructions. + PHINode *OverflowResPHI = Builder.CreatePHI(Ty, 2), + *OverflowFlagPHI = + Builder.CreatePHI(IntegerType::getInt1Ty(I->getContext()), 2); + + // Add the incoming values from no.overflow BB and later from overflow BB. + OverflowResPHI->addIncoming(Mul, NoOverflowBB); + OverflowFlagPHI->addIncoming(ConstantInt::getFalse(I->getContext()), + NoOverflowBB); + + // Replace all users of MulExtract and OverflowExtract to use the PHI nodes. + if (MulExtract) { + MulExtract->replaceAllUsesWith(OverflowResPHI); + MulExtract->eraseFromParent(); + } + if (OverflowExtract) { + OverflowExtract->replaceAllUsesWith(OverflowFlagPHI); + OverflowExtract->eraseFromParent(); + } + + // Remove the intrinsic from parent (overflow.res BB) as it will be part of + // overflow BB + I->removeFromParent(); + // BB overflow: + I->insertInto(OverflowBB, OverflowBB->end()); + Builder.SetInsertPoint(OverflowBB, OverflowBB->end()); + Value *MulOverflow = Builder.CreateExtractValue(I, {0}, "mul.overflow"); + Value *OverflowFlag = Builder.CreateExtractValue(I, {1}, "overflow.flag"); + Builder.CreateBr(OverflowResBB); + + // Add The Extracted values to the PHINodes in the overflow.res BB. + OverflowResPHI->addIncoming(MulOverflow, OverflowBB); + OverflowFlagPHI->addIncoming(OverflowFlag, OverflowBB); + + ModifiedDT = ModifyDT::ModifyBBDT; + return true; +} + /// If there are any memory operands, use OptimizeMemoryInst to sink their /// address computing into the block when possible / profitable. bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) { diff --git a/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp b/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp index 4d2d2da8a4445..2918ba1de652f 100644 --- a/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp +++ b/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp @@ -64,7 +64,6 @@ dwarf::CFIProgram DWARFCFIState::convert(MCCFIInstruction Directive) { /* CodeAlignmentFactor */ 1, /* DataAlignmentFactor */ 1, Context->getTargetTriple().getArch()); - auto MaybeCurrentRow = getCurrentUnwindRow(); switch (Directive.getOperation()) { case MCCFIInstruction::OpSameValue: CFIP.addInstruction(dwarf::DW_CFA_same_value, Directive.getRegister()); diff --git a/llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt index ab287c7af60be..6be59b0890c44 100644 --- a/llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/Orc/Debugging/CMakeLists.txt @@ -22,6 +22,7 @@ add_llvm_component_library(LLVMOrcDebugging BinaryFormat DebugInfoDWARF JITLink + Object OrcJIT OrcShared Support diff --git a/llvm/lib/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.cpp index 9f556b0d07a8b..653645ff03f15 100644 --- a/llvm/lib/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Debugging/ELFDebugObjectPlugin.cpp @@ -1,4 +1,4 @@ -//===------- ELFDebugObjectPlugin.cpp - JITLink debug objects ---------===// +//===--------- ELFDebugObjectPlugin.cpp - JITLink debug objects -----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index cbce8bd736102..a3aa5e9571657 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -183,6 +183,23 @@ bool Constant::isMinSignedValue() const { return false; } +bool Constant::isMaxSignedValue() const { + // Check for INT_MAX integers + if (const ConstantInt *CI = dyn_cast(this)) + return CI->isMaxValue(/*isSigned=*/true); + + // Check for FP which are bitcasted from INT_MAX integers + if (const ConstantFP *CFP = dyn_cast(this)) + return CFP->getValueAPF().bitcastToAPInt().isMaxSignedValue(); + + // Check for splats of INT_MAX values. + if (getType()->isVectorTy()) + if (const auto *SplatVal = getSplatValue()) + return SplatVal->isMaxSignedValue(); + + return false; +} + bool Constant::isNotMinSignedValue() const { // Check for INT_MIN integers if (const ConstantInt *CI = dyn_cast(this)) diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp index 3330b70cdc2e1..939e9c6bf5d2f 100644 --- a/llvm/lib/TableGen/Main.cpp +++ b/llvm/lib/TableGen/Main.cpp @@ -127,8 +127,7 @@ static int WriteOutput(const TGParser &Parser, const char *argv0, return 0; } -int llvm::TableGenMain(const char *argv0, - std::function MainFn) { +int llvm::TableGenMain(const char *argv0, MultiFileTableGenMainFn MainFn) { RecordKeeper Records; TGTimer &Timer = Records.getTimer(); @@ -209,8 +208,7 @@ int llvm::TableGenMain(const char *argv0, return 0; } -int llvm::TableGenMain(const char *argv0, - std::function MainFn) { +int llvm::TableGenMain(const char *argv0, TableGenMainFn MainFn) { return TableGenMain(argv0, [&MainFn](TableGenOutputFiles &OutFiles, const RecordKeeper &Records) { std::string S; diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index f1db05dda4e40..08466667c0fa5 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -4403,43 +4403,46 @@ bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) { bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert) { - if (auto CNode = dyn_cast(N)) { - uint64_t ImmVal = CNode->getZExtValue(); - SDLoc DL(N); - - if (Invert) - ImmVal = ~ImmVal; + uint64_t ImmVal; + if (auto CI = dyn_cast(N)) + ImmVal = CI->getZExtValue(); + else if (auto CFP = dyn_cast(N)) + ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + else + return false; - // Shift mask depending on type size. - switch (VT.SimpleTy) { - case MVT::i8: - ImmVal &= 0xFF; - ImmVal |= ImmVal << 8; - ImmVal |= ImmVal << 16; - ImmVal |= ImmVal << 32; - break; - case MVT::i16: - ImmVal &= 0xFFFF; - ImmVal |= ImmVal << 16; - ImmVal |= ImmVal << 32; - break; - case MVT::i32: - ImmVal &= 0xFFFFFFFF; - ImmVal |= ImmVal << 32; - break; - case MVT::i64: - break; - default: - llvm_unreachable("Unexpected type"); - } + if (Invert) + ImmVal = ~ImmVal; - uint64_t encoding; - if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) { - Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64); - return true; - } + // Shift mask depending on type size. + switch (VT.SimpleTy) { + case MVT::i8: + ImmVal &= 0xFF; + ImmVal |= ImmVal << 8; + ImmVal |= ImmVal << 16; + ImmVal |= ImmVal << 32; + break; + case MVT::i16: + ImmVal &= 0xFFFF; + ImmVal |= ImmVal << 16; + ImmVal |= ImmVal << 32; + break; + case MVT::i32: + ImmVal &= 0xFFFFFFFF; + ImmVal |= ImmVal << 32; + break; + case MVT::i64: + break; + default: + llvm_unreachable("Unexpected type"); } - return false; + + uint64_t encoding; + if (!AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) + return false; + + Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64); + return true; } // SVE shift intrinsics allow shift amounts larger than the element's bitwidth. diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 42567883b2594..d21e19b2ecd46 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -18851,6 +18851,15 @@ bool AArch64TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, return (Index == 0 || Index == ResVT.getVectorMinNumElements()); } +bool AArch64TargetLowering::shouldOptimizeMulOverflowWithZeroHighBits( + LLVMContext &Context, EVT VT) const { + if (getTypeAction(Context, VT) != TypeExpandInteger) + return false; + + EVT LegalTy = EVT::getIntegerVT(Context, VT.getSizeInBits() / 2); + return getTypeAction(Context, LegalTy) == TargetLowering::TypeLegal; +} + /// Turn vector tests of the signbit in the form of: /// xor (sra X, elt_size(X)-1), -1 /// into: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 70bfae717fb76..be198e54cbcbf 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -333,6 +333,11 @@ class AArch64TargetLowering : public TargetLowering { return TargetLowering::shouldFormOverflowOp(Opcode, VT, true); } + // Return true if the target wants to optimize the mul overflow intrinsic + // for the given \p VT. + bool shouldOptimizeMulOverflowWithZeroHighBits(LLVMContext &Context, + EVT VT) const override; + Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override; Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index c8c21c4822ffe..e99b3f8ff07e0 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -989,7 +989,7 @@ let Predicates = [HasSVE_or_SME] in { (DUP_ZR_D (MOVi64imm (bitcast_fpimm_to_i64 f64:$val)))>; // Duplicate FP immediate into all vector elements - let AddedComplexity = 2 in { + let AddedComplexity = 3 in { def : Pat<(nxv8f16 (splat_vector fpimm16:$imm8)), (FDUP_ZI_H fpimm16:$imm8)>; def : Pat<(nxv4f16 (splat_vector fpimm16:$imm8)), diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 1664f4ad0c8fa..1e771e1fb9403 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -347,6 +347,11 @@ def SVELogicalImm16Pat : ComplexPattern", def SVELogicalImm32Pat : ComplexPattern", []>; def SVELogicalImm64Pat : ComplexPattern", []>; +def SVELogicalFPImm16Pat : ComplexPattern", []>; +def SVELogicalFPImm32Pat : ComplexPattern", []>; +def SVELogicalFPImm64Pat : ComplexPattern", []>; +def SVELogicalBFPImmPat : ComplexPattern", []>; + def SVELogicalImm8NotPat : ComplexPattern", []>; def SVELogicalImm16NotPat : ComplexPattern", []>; def SVELogicalImm32NotPat : ComplexPattern", []>; @@ -2160,6 +2165,26 @@ multiclass sve_int_dup_mask_imm { (!cast(NAME) i64:$imm)>; def : Pat<(nxv2i64 (splat_vector (i64 (SVELogicalImm64Pat i64:$imm)))), (!cast(NAME) i64:$imm)>; + + def : Pat<(nxv8f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + def : Pat<(nxv4f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + def : Pat<(nxv2f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + def : Pat<(nxv4f32 (splat_vector (f32 (SVELogicalFPImm32Pat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + def : Pat<(nxv2f32 (splat_vector (f32 (SVELogicalFPImm32Pat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + def : Pat<(nxv2f64 (splat_vector (f64 (SVELogicalFPImm64Pat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + + def : Pat<(nxv8bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + def : Pat<(nxv4bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))), + (!cast(NAME) i64:$imm)>; + def : Pat<(nxv2bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))), + (!cast(NAME) i64:$imm)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp index 9af812960542c..b7078825928be 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp @@ -314,9 +314,7 @@ class SplitGraph { #endif bool empty() const { return Nodes.empty(); } - const iterator_range nodes() const { - return {Nodes.begin(), Nodes.end()}; - } + iterator_range nodes() const { return Nodes; } const Node &getNode(unsigned ID) const { return *Nodes[ID]; } unsigned getNumNodes() const { return Nodes.size(); } diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 67437f6969b27..8b2866260e9c9 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -930,6 +930,24 @@ def Discard : DXILOp<82, discard> { let stages = [Stages]; } +def DerivCoarseX : DXILOp<83, unary> { + let Doc = "computes the rate of change per stamp in x direction"; + let intrinsics = [IntrinSelect]; + let arguments = [OverloadTy]; + let result = OverloadTy; + let overloads = [Overloads]; + let stages = [Stages]; +} + +def DerivCoarseY : DXILOp<84, unary> { + let Doc = "computes the rate of change per stamp in y direction"; + let intrinsics = [IntrinSelect]; + let arguments = [OverloadTy]; + let result = OverloadTy; + let overloads = [Overloads]; + let stages = [Stages]; +} + def ThreadId : DXILOp<93, threadId> { let Doc = "Reads the thread ID"; let intrinsics = [IntrinSelect]; diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp index 6cacbf6564db2..a755dd522969d 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp @@ -64,6 +64,8 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable( case Intrinsic::dx_wave_reduce_usum: case Intrinsic::dx_imad: case Intrinsic::dx_umad: + case Intrinsic::dx_ddx_coarse: + case Intrinsic::dx_ddy_coarse: return true; default: return false; diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index 47022b3f89a8b..76fd834fd7219 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -1697,11 +1697,16 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType(unsigned BitWidth, MachineIRBuilder MIRBuilder(DepMBB, DepMBB.getFirstNonPHI()); const MachineInstr *NewMI = createOpType(MIRBuilder, [&](MachineIRBuilder &MIRBuilder) { - return BuildMI(MIRBuilder.getMBB(), *MIRBuilder.getInsertPt(), - MIRBuilder.getDL(), TII.get(SPIRVOPcode)) - .addDef(createTypeVReg(CurMF->getRegInfo())) - .addImm(BitWidth) - .addImm(0); + auto NewTypeMI = BuildMI(MIRBuilder.getMBB(), *MIRBuilder.getInsertPt(), + MIRBuilder.getDL(), TII.get(SPIRVOPcode)) + .addDef(createTypeVReg(CurMF->getRegInfo())) + .addImm(BitWidth); + // Don't add Encoding to FP type + if (!Ty->isFloatTy()) { + return NewTypeMI.addImm(0); + } else { + return NewTypeMI; + } }); add(Ty, false, NewMI); return finishCreatingSPIRVType(Ty, NewMI); diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index fc87288a4a212..0653b4eb9dfe2 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -328,6 +328,8 @@ class SPIRVInstructionSelector : public InstructionSelector { MachineInstr &I) const; bool selectFrexp(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const; + bool selectDpdCoarse(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I, const unsigned DPdOpCode) const; // Utilities std::pair buildI32Constant(uint32_t Val, MachineInstr &I, @@ -371,6 +373,7 @@ class SPIRVInstructionSelector : public InstructionSelector { bool loadHandleBeforePosition(Register &HandleReg, const SPIRVType *ResType, GIntrinsic &HandleDef, MachineInstr &Pos) const; void decorateUsesAsNonUniform(Register &NonUniformReg) const; + void errorIfInstrOutsideShader(MachineInstr &I) const; }; bool sampledTypeIsSignedInteger(const llvm::Type *HandleType) { @@ -3140,6 +3143,58 @@ bool SPIRVInstructionSelector::wrapIntoSpecConstantOp( return Result; } +bool SPIRVInstructionSelector::selectDpdCoarse(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I, + const unsigned DPdOpCode) const { + // TODO: This should check specifically for Fragment Execution Model, but STI + // doesn't provide that information yet. See #167562 + errorIfInstrOutsideShader(I); + + // If the arg/result types are half then we need to wrap the instr in + // conversions to float + // This case occurs because a half arg/result is legal in HLSL but not spirv. + Register SrcReg = I.getOperand(2).getReg(); + SPIRVType *SrcType = GR.getSPIRVTypeForVReg(SrcReg); + unsigned BitWidth = std::min(GR.getScalarOrVectorBitWidth(SrcType), + GR.getScalarOrVectorBitWidth(ResType)); + if (BitWidth == 32) + return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(DPdOpCode)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(I.getOperand(2).getReg()); + + MachineIRBuilder MIRBuilder(I); + unsigned componentCount = GR.getScalarOrVectorComponentCount(SrcType); + SPIRVType *F32ConvertTy = GR.getOrCreateSPIRVFloatType(32, I, TII); + if (componentCount != 1) + F32ConvertTy = GR.getOrCreateSPIRVVectorType(F32ConvertTy, componentCount, + MIRBuilder, false); + + const TargetRegisterClass *RegClass = GR.getRegClass(SrcType); + Register ConvertToVReg = MRI->createVirtualRegister(RegClass); + Register DpdOpVReg = MRI->createVirtualRegister(RegClass); + + bool Result = + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpFConvert)) + .addDef(ConvertToVReg) + .addUse(GR.getSPIRVTypeID(F32ConvertTy)) + .addUse(SrcReg) + .constrainAllUses(TII, TRI, RBI); + Result &= BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(DPdOpCode)) + .addDef(DpdOpVReg) + .addUse(GR.getSPIRVTypeID(F32ConvertTy)) + .addUse(ConvertToVReg) + .constrainAllUses(TII, TRI, RBI); + Result &= + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpFConvert)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(DpdOpVReg) + .constrainAllUses(TII, TRI, RBI); + return Result; +} + bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const { @@ -3528,7 +3583,12 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, case Intrinsic::spv_unpackhalf2x16: { return selectExtInst(ResVReg, ResType, I, GL::UnpackHalf2x16); } - + case Intrinsic::spv_ddx_coarse: { + return selectDpdCoarse(ResVReg, ResType, I, SPIRV::OpDPdxCoarse); + } + case Intrinsic::spv_ddy_coarse: { + return selectDpdCoarse(ResVReg, ResType, I, SPIRV::OpDPdyCoarse); + } default: { std::string DiagMsg; raw_string_ostream OS(DiagMsg); @@ -4694,6 +4754,17 @@ bool SPIRVInstructionSelector::loadHandleBeforePosition( .constrainAllUses(TII, TRI, RBI); } +void SPIRVInstructionSelector::errorIfInstrOutsideShader( + MachineInstr &I) const { + if (!STI.isShader()) { + std::string DiagMsg; + raw_string_ostream OS(DiagMsg); + I.print(OS, true, false, false, false); + DiagMsg += " is only supported in shaders.\n"; + report_fatal_error(DiagMsg.c_str(), false); + } +} + namespace llvm { InstructionSelector * createSPIRVInstructionSelector(const SPIRVTargetMachine &TM, diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index b8cd9c1358f00..bd754d17694b8 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -934,7 +934,8 @@ void RequirementHandler::initAvailableCapabilitiesForVulkan( Capability::UniformBufferArrayDynamicIndexing, Capability::SampledImageArrayDynamicIndexing, Capability::StorageBufferArrayDynamicIndexing, - Capability::StorageImageArrayDynamicIndexing}); + Capability::StorageImageArrayDynamicIndexing, + Capability::DerivativeControl}); // Became core in Vulkan 1.2 if (ST.isAtLeastSPIRVVer(VersionTuple(1, 5))) { @@ -2148,6 +2149,12 @@ void addInstrRequirements(const MachineInstr &MI, } break; } + case SPIRV::OpDPdxCoarse: + case SPIRV::OpDPdyCoarse: { + Reqs.addCapability(SPIRV::Capability::DerivativeControl); + break; + } + default: break; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 621f1868d3311..864e5dc67682c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -54688,11 +54688,14 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, KnownBits KnownAmt = DAG.computeKnownBits(ShAmt); // Check the shift amount is byte aligned. // Check the truncation doesn't use any shifted in (zero) top bits. - // Check the shift amount doesn't depend on the original load. + // Check the shift amount doesn't depend on the original load chain. if (KnownAmt.countMinTrailingZeros() >= 3 && KnownAmt.getMaxValue().ule(SrcVT.getSizeInBits() - VT.getSizeInBits()) && - !Ld->isPredecessorOf(ShAmt.getNode())) { + none_of(Ld->uses(), [&ShAmt](SDUse &Use) { + return Use.getResNo() == 1 && + Use.getUser()->isPredecessorOf(ShAmt.getNode()); + })) { EVT PtrVT = Ld->getBasePtr().getValueType(); SDValue PtrBitOfs = DAG.getZExtOrTrunc(ShAmt, DL, PtrVT); SDValue PtrByteOfs = diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 400efa94789d3..7df6bf15f9fdd 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -248,6 +248,11 @@ static cl::opt "platforms that support this"), cl::Hidden, cl::init(true)); +static cl::opt + ClShadowAddrSpace("asan-shadow-addr-space", + cl::desc("Address space for pointers to the shadow map"), + cl::Hidden, cl::init(0)); + static cl::opt ClWithIfuncSuppressRemat( "asan-with-ifunc-suppress-remat", cl::desc("Suppress rematerialization of dynamic shadow address by passing " @@ -503,6 +508,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, bool IsAMDGPU = TargetTriple.isAMDGPU(); bool IsHaiku = TargetTriple.isOSHaiku(); bool IsWasm = TargetTriple.isWasm(); + bool IsBPF = TargetTriple.isBPF(); ShadowMapping Mapping; @@ -579,6 +585,8 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, else if (IsHaiku && IsX86_64) Mapping.Offset = (kSmallX86_64ShadowOffsetBase & (kSmallX86_64ShadowOffsetAlignMask << Mapping.Scale)); + else if (IsBPF) + Mapping.Offset = kDynamicShadowSentinel; else Mapping.Offset = kDefaultShadowOffset64; } @@ -1942,7 +1950,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, Type *ShadowTy = IntegerType::get(*C, std::max(8U, TypeStoreSize >> Mapping.Scale)); - Type *ShadowPtrTy = PointerType::get(*C, 0); + Type *ShadowPtrTy = PointerType::get(*C, ClShadowAddrSpace); Value *ShadowPtr = memToShadow(AddrLong, IRB); const uint64_t ShadowAlign = std::max(Alignment.valueOrOne().value() >> Mapping.Scale, 1); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index f533a47150a7b..741392247c0d6 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -152,11 +152,12 @@ class VPBuilder { /// its underlying Instruction. VPInstruction *createNaryOp(unsigned Opcode, ArrayRef Operands, Instruction *Inst = nullptr, + const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {}, DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") { VPInstruction *NewVPInst = tryInsertInstruction( - new VPInstruction(Opcode, Operands, {}, MD, DL, Name)); + new VPInstruction(Opcode, Operands, Flags, MD, DL, Name)); NewVPInst->setUnderlyingValue(Inst); return NewVPInst; } @@ -329,7 +330,7 @@ class VPBuilder { else if (Opcode == Instruction::ZExt) Flags = VPIRFlags::NonNegFlagsTy(false); return tryInsertInstruction( - new VPWidenCastRecipe(Opcode, Op, ResultTy, Flags)); + new VPWidenCastRecipe(Opcode, Op, ResultTy, nullptr, Flags)); } VPScalarIVStepsRecipe * diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 356d759b94799..c680b6fca84cd 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7750,7 +7750,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(VPInstruction *VPI, }, Range); if (ShouldUseVectorIntrinsic) - return new VPWidenIntrinsicRecipe(*CI, ID, Ops, CI->getType(), *VPI, + return new VPWidenIntrinsicRecipe(*CI, ID, Ops, CI->getType(), *VPI, *VPI, VPI->getDebugLoc()); Function *Variant = nullptr; @@ -7804,7 +7804,8 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(VPInstruction *VPI, } Ops.push_back(VPI->getOperand(VPI->getNumOperands() - 1)); - return new VPWidenCallRecipe(CI, Variant, Ops, VPI->getDebugLoc()); + return new VPWidenCallRecipe(CI, Variant, Ops, *VPI, *VPI, + VPI->getDebugLoc()); } return nullptr; @@ -7842,7 +7843,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(VPInstruction *VPI) { auto *SafeRHS = Builder.createSelect(Mask, Ops[1], One, VPI->getDebugLoc()); Ops[1] = SafeRHS; - return new VPWidenRecipe(*I, Ops, *VPI, VPI->getDebugLoc()); + return new VPWidenRecipe(*I, Ops, *VPI, *VPI, VPI->getDebugLoc()); } [[fallthrough]]; } @@ -7888,7 +7889,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(VPInstruction *VPI) { // For other binops, the legacy cost model only checks the second operand. NewOps[1] = GetConstantViaSCEV(NewOps[1]); } - return new VPWidenRecipe(*I, NewOps, *VPI, VPI->getDebugLoc()); + return new VPWidenRecipe(*I, NewOps, *VPI, *VPI, VPI->getDebugLoc()); } case Instruction::ExtractValue: { SmallVector NewOps(VPI->operands()); @@ -7896,7 +7897,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(VPInstruction *VPI) { assert(EVI->getNumIndices() == 1 && "Expected one extractvalue index"); unsigned Idx = EVI->getIndices()[0]; NewOps.push_back(Plan.getConstantInt(32, Idx)); - return new VPWidenRecipe(*I, NewOps, *VPI, VPI->getDebugLoc()); + return new VPWidenRecipe(*I, NewOps, *VPI, *VPI, VPI->getDebugLoc()); } }; } @@ -7981,7 +7982,8 @@ VPReplicateRecipe *VPRecipeBuilder::handleReplication(VPInstruction *VPI, (Range.Start.isScalable() && isa(I))) && "Should not predicate a uniform recipe"); auto *Recipe = - new VPReplicateRecipe(I, VPI->operands(), IsUniform, BlockInMask, *VPI); + new VPReplicateRecipe(I, VPI->operands(), IsUniform, BlockInMask, *VPI, + *VPI, VPI->getDebugLoc()); return Recipe; } @@ -8231,17 +8233,19 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, return nullptr; if (VPI->getOpcode() == Instruction::GetElementPtr) - return new VPWidenGEPRecipe(cast(Instr), R->operands()); + return new VPWidenGEPRecipe(cast(Instr), R->operands(), + *VPI, VPI->getDebugLoc()); if (VPI->getOpcode() == Instruction::Select) - return new VPWidenSelectRecipe(*cast(Instr), R->operands(), - *VPI); + return new VPWidenSelectRecipe(cast(Instr), R->operands(), *VPI, + *VPI, VPI->getDebugLoc()); if (Instruction::isCast(VPI->getOpcode())) { - auto *CastR = cast(R); auto *CI = cast(Instr); + auto *CastR = cast(VPI); return new VPWidenCastRecipe(CI->getOpcode(), VPI->getOperand(0), - CastR->getResultType(), *CI, *VPI); + CastR->getResultType(), CI, *VPI, *VPI, + VPI->getDebugLoc()); } return tryToWiden(VPI); @@ -8269,8 +8273,8 @@ VPRecipeBuilder::tryToCreatePartialReduction(VPInstruction *Reduction, SmallVector Ops; Ops.push_back(Plan.getOrAddLiveIn(Zero)); Ops.push_back(BinOp); - BinOp = new VPWidenRecipe(*ReductionI, Ops, VPIRMetadata(), - ReductionI->getDebugLoc()); + BinOp = new VPWidenRecipe(*ReductionI, Ops, VPIRFlags(*ReductionI), + VPIRMetadata(), ReductionI->getDebugLoc()); Builder.insert(BinOp->getDefiningRecipe()); ReductionOpcode = Instruction::Add; } @@ -8454,9 +8458,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) { // Only create recipe for the final invariant store of the reduction. if (Legal->isInvariantStoreOfReduction(SI)) { + auto *VPI = cast(SingleDef); auto *Recipe = new VPReplicateRecipe( - SI, R.operands(), true /* IsUniform */, nullptr /*Mask*/, - *cast(SingleDef)); + SI, R.operands(), true /* IsUniform */, nullptr /*Mask*/, *VPI, + *VPI, VPI->getDebugLoc()); Recipe->insertBefore(*MiddleVPBB, MBIP); } R.eraseFromParent(); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index fc29ab0c84093..fedbcfb6bd32a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -882,14 +882,6 @@ class VPIRFlags { /// A pure-virtual common base class for recipes defining a single VPValue and /// using IR flags. struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags { - VPRecipeWithIRFlags(const unsigned char SC, ArrayRef Operands, - DebugLoc DL = DebugLoc::getUnknown()) - : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags() {} - - VPRecipeWithIRFlags(const unsigned char SC, ArrayRef Operands, - Instruction &I) - : VPSingleDefRecipe(SC, Operands, &I, I.getDebugLoc()), VPIRFlags(I) {} - VPRecipeWithIRFlags(const unsigned char SC, ArrayRef Operands, const VPIRFlags &Flags, DebugLoc DL = DebugLoc::getUnknown()) @@ -1474,9 +1466,12 @@ class LLVM_ABI_FOR_TEST VPWidenRecipe : public VPRecipeWithIRFlags, VPIRMetadata(Metadata), Opcode(Opcode) {} VPWidenRecipe(Instruction &I, ArrayRef Operands, - const VPIRMetadata &Metadata, DebugLoc DL) - : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), - VPIRMetadata(Metadata), Opcode(I.getOpcode()) {} + const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {}, + DebugLoc DL = {}) + : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL), + VPIRMetadata(Metadata), Opcode(I.getOpcode()) { + setUnderlyingValue(&I); + } ~VPWidenRecipe() override = default; @@ -1517,30 +1512,22 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { public: VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, - CastInst &UI, const VPIRMetadata &Metadata) - : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), - VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) { - assert(UI.getOpcode() == Opcode && - "opcode of underlying cast doesn't match"); - } - VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, - const VPIRFlags &Flags = {}, + CastInst *CI = nullptr, const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {}, DebugLoc DL = DebugLoc::getUnknown()) : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL), VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) { assert(flagsValidForOpcode(Opcode) && "Set flags not supported for the provided opcode"); + setUnderlyingValue(CI); } ~VPWidenCastRecipe() override = default; VPWidenCastRecipe *clone() override { - auto *New = new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy, *this, - *this, getDebugLoc()); - if (auto *UV = getUnderlyingValue()) - New->setUnderlyingValue(UV); - return New; + return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy, + cast_or_null(getUnderlyingValue()), + *this, *this, getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPWidenCastSC) @@ -1585,13 +1572,17 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { public: VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef CallArguments, Type *Ty, + const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {}, DebugLoc DL = DebugLoc::getUnknown()) - : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI), + : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags, + DL), VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty), MayReadFromMemory(CI.mayReadFromMemory()), MayWriteToMemory(CI.mayWriteToMemory()), - MayHaveSideEffects(CI.mayHaveSideEffects()) {} + MayHaveSideEffects(CI.mayHaveSideEffects()) { + setUnderlyingValue(&CI); + } VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef CallArguments, Type *Ty, @@ -1617,7 +1608,7 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { VPWidenIntrinsicRecipe *clone() override { if (Value *CI = getUnderlyingValue()) return new VPWidenIntrinsicRecipe(*cast(CI), VectorIntrinsicID, - operands(), ResultTy, *this, + operands(), ResultTy, *this, *this, getDebugLoc()); return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy, *this, *this, getDebugLoc()); @@ -1671,10 +1662,11 @@ class LLVM_ABI_FOR_TEST VPWidenCallRecipe : public VPRecipeWithIRFlags, public: VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef CallArguments, - DebugLoc DL = DebugLoc::getUnknown()) - : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments, - *cast(UV)), - VPIRMetadata(*cast(UV)), Variant(Variant) { + const VPIRFlags &Flags = {}, + const VPIRMetadata &Metadata = {}, DebugLoc DL = {}) + : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments, Flags, DL), + VPIRMetadata(Metadata), Variant(Variant) { + setUnderlyingValue(UV); assert( isa(getOperand(getNumOperands() - 1)->getLiveInIRValue()) && "last operand must be the called function"); @@ -1684,7 +1676,7 @@ class LLVM_ABI_FOR_TEST VPWidenCallRecipe : public VPRecipeWithIRFlags, VPWidenCallRecipe *clone() override { return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(), - getDebugLoc()); + *this, *this, getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPWidenCallSC) @@ -1761,16 +1753,19 @@ class VPHistogramRecipe : public VPRecipeBase { /// instruction. struct LLVM_ABI_FOR_TEST VPWidenSelectRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { - VPWidenSelectRecipe(SelectInst &I, ArrayRef Operands, - const VPIRMetadata &MD = {}) - : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, I), - VPIRMetadata(MD) {} + VPWidenSelectRecipe(SelectInst *SI, ArrayRef Operands, + const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {}, + DebugLoc DL = {}) + : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, Flags, DL), + VPIRMetadata(MD) { + setUnderlyingValue(SI); + } ~VPWidenSelectRecipe() override = default; VPWidenSelectRecipe *clone() override { - return new VPWidenSelectRecipe(*cast(getUnderlyingInstr()), - operands(), *this); + return new VPWidenSelectRecipe(cast(getUnderlyingInstr()), + operands(), *this, *this, getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC) @@ -1822,9 +1817,12 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags { } public: - VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef Operands) - : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP), + VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef Operands, + const VPIRFlags &Flags = {}, + DebugLoc DL = DebugLoc::getUnknown()) + : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, Flags, DL), SourceElementTy(GEP->getSourceElementType()) { + setUnderlyingValue(GEP); SmallVector> Metadata; (void)Metadata; getMetadataToPropagate(GEP, Metadata); @@ -1835,7 +1833,7 @@ class LLVM_ABI_FOR_TEST VPWidenGEPRecipe : public VPRecipeWithIRFlags { VPWidenGEPRecipe *clone() override { return new VPWidenGEPRecipe(cast(getUnderlyingInstr()), - operands()); + operands(), *this, getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC) @@ -2929,10 +2927,12 @@ class LLVM_ABI_FOR_TEST VPReplicateRecipe : public VPRecipeWithIRFlags, public: VPReplicateRecipe(Instruction *I, ArrayRef Operands, bool IsSingleScalar, VPValue *Mask = nullptr, - VPIRMetadata Metadata = {}) - : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I), + const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {}, + DebugLoc DL = DebugLoc::getUnknown()) + : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, Flags, DL), VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar), IsPredicated(Mask) { + setUnderlyingValue(I); if (Mask) addOperand(Mask); } @@ -2940,9 +2940,9 @@ class LLVM_ABI_FOR_TEST VPReplicateRecipe : public VPRecipeWithIRFlags, ~VPReplicateRecipe() override = default; VPReplicateRecipe *clone() override { - auto *Copy = - new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsSingleScalar, - isPredicated() ? getMask() : nullptr, *this); + auto *Copy = new VPReplicateRecipe( + getUnderlyingInstr(), operands(), IsSingleScalar, + isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc()); Copy->transferFlags(*this); return Copy; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 612202d049774..dbbde1cafa9f2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -190,7 +190,7 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, // recipes. if (Br->isConditional()) { VPValue *Cond = getOrCreateVPOperand(Br->getCondition()); - VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst, + VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst, {}, VPIRMetadata(*Inst), Inst->getDebugLoc()); } @@ -205,7 +205,7 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, SmallVector Ops = {getOrCreateVPOperand(SI->getCondition())}; for (auto Case : SI->cases()) Ops.push_back(getOrCreateVPOperand(Case.getCaseValue())); - VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst, + VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst, {}, VPIRMetadata(*Inst), Inst->getDebugLoc()); continue; } @@ -255,13 +255,14 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, if (auto *CI = dyn_cast(Inst)) { NewR = VPIRBuilder.createScalarCast(CI->getOpcode(), VPOperands[0], CI->getType(), CI->getDebugLoc(), - {}, MD); + VPIRFlags(*CI), MD); NewR->setUnderlyingValue(CI); } else { // Build VPInstruction for any arbitrary Instruction without specific // representation in VPlan. - NewR = VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst, MD, - Inst->getDebugLoc()); + NewR = + VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst, + VPIRFlags(*Inst), MD, Inst->getDebugLoc()); } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index fca6554ad77c6..ef36e29aaa5c4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2056,24 +2056,26 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const { switch (OpType) { case OperationType::OverflowingBinOp: return Opcode == Instruction::Add || Opcode == Instruction::Sub || - Opcode == Instruction::Mul || + Opcode == Instruction::Mul || Opcode == Instruction::Shl || Opcode == VPInstruction::VPInstruction::CanonicalIVIncrementForPart; case OperationType::Trunc: return Opcode == Instruction::Trunc; case OperationType::DisjointOp: return Opcode == Instruction::Or; case OperationType::PossiblyExactOp: - return Opcode == Instruction::AShr; + return Opcode == Instruction::AShr || Opcode == Instruction::LShr || + Opcode == Instruction::UDiv || Opcode == Instruction::SDiv; case OperationType::GEPOp: return Opcode == Instruction::GetElementPtr || Opcode == VPInstruction::PtrAdd || Opcode == VPInstruction::WidePtrAdd; case OperationType::FPMathOp: - return Opcode == Instruction::FAdd || Opcode == Instruction::FMul || - Opcode == Instruction::FSub || Opcode == Instruction::FNeg || - Opcode == Instruction::FDiv || Opcode == Instruction::FRem || - Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc || - Opcode == Instruction::FCmp || Opcode == Instruction::Select || + return Opcode == Instruction::Call || Opcode == Instruction::FAdd || + Opcode == Instruction::FMul || Opcode == Instruction::FSub || + Opcode == Instruction::FNeg || Opcode == Instruction::FDiv || + Opcode == Instruction::FRem || Opcode == Instruction::FPExt || + Opcode == Instruction::FPTrunc || Opcode == Instruction::FCmp || + Opcode == Instruction::Select || Opcode == VPInstruction::WideIVStep || Opcode == VPInstruction::ReductionStartVector || Opcode == VPInstruction::ComputeReductionResult; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 26563242de283..25557f1d5d651 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -104,24 +104,26 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/, *VPI, Ingredient.getDebugLoc()); } else if (GetElementPtrInst *GEP = dyn_cast(Inst)) { - NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands()); + NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands(), *VPI, + Ingredient.getDebugLoc()); } else if (CallInst *CI = dyn_cast(Inst)) { Intrinsic::ID VectorID = getVectorIntrinsicIDForCall(CI, &TLI); if (VectorID == Intrinsic::not_intrinsic) return false; NewRecipe = new VPWidenIntrinsicRecipe( *CI, getVectorIntrinsicIDForCall(CI, &TLI), - drop_end(Ingredient.operands()), CI->getType(), *VPI, - CI->getDebugLoc()); + drop_end(Ingredient.operands()), CI->getType(), VPIRFlags(*CI), + *VPI, CI->getDebugLoc()); } else if (SelectInst *SI = dyn_cast(Inst)) { - NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands(), *VPI); + NewRecipe = new VPWidenSelectRecipe(SI, Ingredient.operands(), *VPI, + *VPI, Ingredient.getDebugLoc()); } else if (auto *CI = dyn_cast(Inst)) { - NewRecipe = - new VPWidenCastRecipe(CI->getOpcode(), Ingredient.getOperand(0), - CI->getType(), *CI, *VPI); + NewRecipe = new VPWidenCastRecipe( + CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI, + VPIRFlags(*CI), VPIRMetadata(*CI)); } else { NewRecipe = new VPWidenRecipe(*Inst, Ingredient.operands(), *VPI, - Ingredient.getDebugLoc()); + *VPI, Ingredient.getDebugLoc()); } } @@ -226,7 +228,8 @@ static bool sinkScalarOperands(VPlan &Plan) { // then cloning should be sufficient here. Instruction *I = SinkCandidate->getUnderlyingInstr(); Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true, - nullptr /*Mask*/, *SinkCandidateRepR); + nullptr /*Mask*/, *SinkCandidateRepR, + *SinkCandidateRepR); // TODO: add ".cloned" suffix to name of Clone's VPValue. } else { Clone = SinkCandidate->clone(); @@ -385,7 +388,8 @@ static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe, // mask but in the replicate region. auto *RecipeWithoutMask = new VPReplicateRecipe( PredRecipe->getUnderlyingInstr(), drop_end(PredRecipe->operands()), - PredRecipe->isSingleScalar(), nullptr /*Mask*/, *PredRecipe); + PredRecipe->isSingleScalar(), nullptr /*Mask*/, *PredRecipe, *PredRecipe, + PredRecipe->getDebugLoc()); auto *Pred = Plan.createVPBasicBlock(Twine(RegionName) + ".if", RecipeWithoutMask); @@ -691,7 +695,7 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) { // analysis. auto Users = collectUsersRecursively(PhiR); for (VPUser *U : reverse(Users)) { - auto *Def = dyn_cast(U); + auto *Def = dyn_cast(U); auto *RepR = dyn_cast(U); // Skip recipes that shouldn't be narrowed. if (!Def || !isa(Def) || @@ -704,7 +708,8 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) { continue; auto *Clone = new VPReplicateRecipe(Def->getUnderlyingInstr(), - Def->operands(), /*IsUniform*/ true); + Def->operands(), /*IsUniform*/ true, + /*Mask*/ nullptr, /*Flags*/ *Def); Clone->insertAfter(Def); Def->replaceAllUsesWith(Clone); } @@ -1423,12 +1428,13 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { if (RepR && (RepR->isSingleScalar() || RepR->isPredicated())) continue; - auto *RepOrWidenR = cast(&R); + auto *RepOrWidenR = cast(&R); if (RepR && isa(RepR->getUnderlyingInstr()) && vputils::isSingleScalar(RepR->getOperand(1))) { auto *Clone = new VPReplicateRecipe( RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(), - true /*IsSingleScalar*/, nullptr /*Mask*/, *RepR /*Metadata*/); + true /*IsSingleScalar*/, nullptr /*Mask*/, *RepR /*Flags*/, + *RepR /*Metadata*/, RepR->getDebugLoc()); Clone->insertBefore(RepOrWidenR); unsigned ExtractOpc = vputils::isUniformAcrossVFsAndUFs(RepR->getOperand(1)) @@ -1469,9 +1475,9 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { })) continue; - auto *Clone = new VPReplicateRecipe(RepOrWidenR->getUnderlyingInstr(), - RepOrWidenR->operands(), - true /*IsSingleScalar*/); + auto *Clone = new VPReplicateRecipe( + RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(), + true /*IsSingleScalar*/, nullptr, *RepOrWidenR); Clone->insertBefore(RepOrWidenR); RepOrWidenR->replaceAllUsesWith(Clone); if (isDeadRecipe(*RepOrWidenR)) @@ -3824,15 +3830,15 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red, Ext0->getOpcode() == Ext1->getOpcode() && IsMulAccValidAndClampRange(Mul, Ext0, Ext1, Ext) && Mul->hasOneUse()) { auto *NewExt0 = new VPWidenCastRecipe( - Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), *Ext0, - *Ext0, Ext0->getDebugLoc()); + Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), nullptr, + *Ext0, *Ext0, Ext0->getDebugLoc()); NewExt0->insertBefore(Ext0); VPWidenCastRecipe *NewExt1 = NewExt0; if (Ext0 != Ext1) { NewExt1 = new VPWidenCastRecipe(Ext1->getOpcode(), Ext1->getOperand(0), - Ext->getResultType(), *Ext1, *Ext1, - Ext1->getDebugLoc()); + Ext->getResultType(), nullptr, *Ext1, + *Ext1, Ext1->getDebugLoc()); NewExt1->insertBefore(Ext1); } Mul->setOperand(0, NewExt0); @@ -4353,7 +4359,7 @@ narrowInterleaveGroupOp(VPValue *V, SmallPtrSetImpl &NarrowedOps) { // process one original iteration. auto *N = new VPReplicateRecipe(&WideLoad->getIngredient(), {PtrOp}, /*IsUniform*/ true, - /*Mask*/ nullptr, *WideLoad); + /*Mask*/ nullptr, {}, *WideLoad); N->insertBefore(WideLoad); NarrowedOps.insert(N); return N; diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index d4b8b72beb942..d76d2ed5f1c76 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -518,9 +518,9 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, // TODO: have cloning of replicate recipes also provide the desired result // coupled with setting its operands to NewOps (deriving IsSingleScalar and // Mask from the operands?) - New = - new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps, - /*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR); + New = new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps, + /*IsSingleScalar=*/true, /*Mask=*/nullptr, + *RepR, *RepR, RepR->getDebugLoc()); } else { assert(isa(DefR) && "DefR must be a VPReplicateRecipe or VPInstruction"); diff --git a/llvm/test/CodeGen/AArch64/i128-math.ll b/llvm/test/CodeGen/AArch64/i128-math.ll index 9e1c0c1b115ab..12ae241dda4bd 100644 --- a/llvm/test/CodeGen/AArch64/i128-math.ll +++ b/llvm/test/CodeGen/AArch64/i128-math.ll @@ -262,20 +262,28 @@ define i128 @u128_mul(i128 %x, i128 %y) { define { i128, i8 } @u128_checked_mul(i128 %x, i128 %y) { ; CHECK-LABEL: u128_checked_mul: ; CHECK: // %bb.0: +; CHECK-NEXT: orr x8, x1, x3 +; CHECK-NEXT: cbz x8, .LBB17_2 +; CHECK-NEXT: // %bb.1: // %overflow ; CHECK-NEXT: mul x9, x3, x0 ; CHECK-NEXT: cmp x1, #0 ; CHECK-NEXT: ccmp x3, #0, #4, ne -; CHECK-NEXT: umulh x8, x1, x2 -; CHECK-NEXT: umulh x10, x3, x0 +; CHECK-NEXT: umulh x10, x1, x2 +; CHECK-NEXT: umulh x8, x3, x0 ; CHECK-NEXT: madd x9, x1, x2, x9 -; CHECK-NEXT: ccmp xzr, x8, #0, eq -; CHECK-NEXT: umulh x11, x0, x2 ; CHECK-NEXT: ccmp xzr, x10, #0, eq +; CHECK-NEXT: umulh x11, x0, x2 +; CHECK-NEXT: ccmp xzr, x8, #0, eq ; CHECK-NEXT: mul x0, x0, x2 ; CHECK-NEXT: cset w8, ne ; CHECK-NEXT: adds x1, x11, x9 ; CHECK-NEXT: csinc w8, w8, wzr, lo ; CHECK-NEXT: eor w2, w8, #0x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB17_2: // %overflow.no +; CHECK-NEXT: umulh x1, x0, x2 +; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: eor w2, w8, #0x1 ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 @@ -290,19 +298,27 @@ define { i128, i8 } @u128_checked_mul(i128 %x, i128 %y) { define { i128, i8 } @u128_overflowing_mul(i128 %x, i128 %y) { ; CHECK-LABEL: u128_overflowing_mul: ; CHECK: // %bb.0: +; CHECK-NEXT: orr x8, x1, x3 +; CHECK-NEXT: cbz x8, .LBB18_2 +; CHECK-NEXT: // %bb.1: // %overflow ; CHECK-NEXT: mul x9, x3, x0 ; CHECK-NEXT: cmp x1, #0 ; CHECK-NEXT: ccmp x3, #0, #4, ne -; CHECK-NEXT: umulh x8, x1, x2 -; CHECK-NEXT: umulh x10, x3, x0 +; CHECK-NEXT: umulh x10, x1, x2 +; CHECK-NEXT: umulh x8, x3, x0 ; CHECK-NEXT: madd x9, x1, x2, x9 -; CHECK-NEXT: ccmp xzr, x8, #0, eq -; CHECK-NEXT: umulh x11, x0, x2 ; CHECK-NEXT: ccmp xzr, x10, #0, eq +; CHECK-NEXT: umulh x11, x0, x2 +; CHECK-NEXT: ccmp xzr, x8, #0, eq ; CHECK-NEXT: mul x0, x0, x2 ; CHECK-NEXT: cset w8, ne ; CHECK-NEXT: adds x1, x11, x9 ; CHECK-NEXT: csinc w2, w8, wzr, lo +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB18_2: // %overflow.no +; CHECK-NEXT: umulh x1, x0, x2 +; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: mov w2, wzr ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 @@ -316,19 +332,28 @@ define { i128, i8 } @u128_overflowing_mul(i128 %x, i128 %y) { define i128 @u128_saturating_mul(i128 %x, i128 %y) { ; CHECK-LABEL: u128_saturating_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: mul x9, x3, x0 +; CHECK-NEXT: orr x8, x1, x3 +; CHECK-NEXT: cbz x8, .LBB19_2 +; CHECK-NEXT: // %bb.1: // %overflow +; CHECK-NEXT: mul x8, x3, x0 ; CHECK-NEXT: cmp x1, #0 ; CHECK-NEXT: ccmp x3, #0, #4, ne -; CHECK-NEXT: umulh x8, x1, x2 -; CHECK-NEXT: umulh x10, x3, x0 -; CHECK-NEXT: madd x9, x1, x2, x9 -; CHECK-NEXT: ccmp xzr, x8, #0, eq -; CHECK-NEXT: umulh x11, x0, x2 +; CHECK-NEXT: umulh x10, x1, x2 +; CHECK-NEXT: umulh x9, x3, x0 +; CHECK-NEXT: madd x11, x1, x2, x8 ; CHECK-NEXT: ccmp xzr, x10, #0, eq +; CHECK-NEXT: umulh x12, x0, x2 +; CHECK-NEXT: ccmp xzr, x9, #0, eq ; CHECK-NEXT: mul x8, x0, x2 ; CHECK-NEXT: cset w10, ne -; CHECK-NEXT: adds x9, x11, x9 +; CHECK-NEXT: adds x9, x12, x11 ; CHECK-NEXT: csinc w10, w10, wzr, lo +; CHECK-NEXT: b .LBB19_3 +; CHECK-NEXT: .LBB19_2: // %overflow.no +; CHECK-NEXT: umulh x9, x0, x2 +; CHECK-NEXT: mov w10, wzr +; CHECK-NEXT: mul x8, x0, x2 +; CHECK-NEXT: .LBB19_3: // %overflow.res ; CHECK-NEXT: cmp w10, #0 ; CHECK-NEXT: csinv x0, x8, xzr, eq ; CHECK-NEXT: csinv x1, x9, xzr, eq @@ -355,6 +380,11 @@ define i128 @i128_mul(i128 %x, i128 %y) { define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) { ; CHECK-LABEL: i128_checked_mul: ; CHECK: // %bb.0: +; CHECK-NEXT: eor x8, x3, x2, asr #63 +; CHECK-NEXT: eor x9, x1, x0, asr #63 +; CHECK-NEXT: orr x8, x9, x8 +; CHECK-NEXT: cbz x8, .LBB21_2 +; CHECK-NEXT: // %bb.1: // %overflow ; CHECK-NEXT: asr x9, x1, #63 ; CHECK-NEXT: umulh x10, x0, x2 ; CHECK-NEXT: asr x13, x3, #63 @@ -364,24 +394,30 @@ define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) { ; CHECK-NEXT: adds x10, x11, x10 ; CHECK-NEXT: mul x14, x0, x3 ; CHECK-NEXT: umulh x12, x0, x3 -; CHECK-NEXT: adc x9, x8, x9 +; CHECK-NEXT: adc x8, x8, x9 +; CHECK-NEXT: mov x9, x1 ; CHECK-NEXT: mul x13, x0, x13 -; CHECK-NEXT: adds x8, x14, x10 -; CHECK-NEXT: mul x15, x1, x3 -; CHECK-NEXT: smulh x10, x1, x3 -; CHECK-NEXT: mov x1, x8 -; CHECK-NEXT: adc x11, x12, x13 -; CHECK-NEXT: asr x12, x9, #63 -; CHECK-NEXT: asr x13, x11, #63 -; CHECK-NEXT: adds x9, x9, x11 ; CHECK-NEXT: asr x11, x8, #63 +; CHECK-NEXT: mul x15, x1, x3 +; CHECK-NEXT: adds x1, x14, x10 +; CHECK-NEXT: smulh x9, x9, x3 +; CHECK-NEXT: adc x10, x12, x13 +; CHECK-NEXT: asr x12, x10, #63 +; CHECK-NEXT: adds x8, x8, x10 +; CHECK-NEXT: asr x10, x1, #63 ; CHECK-NEXT: mul x0, x0, x2 -; CHECK-NEXT: adc x12, x12, x13 -; CHECK-NEXT: adds x9, x15, x9 -; CHECK-NEXT: adc x10, x10, x12 -; CHECK-NEXT: cmp x9, x11 -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: cset w2, eq +; CHECK-NEXT: adc x11, x11, x12 +; CHECK-NEXT: adds x8, x15, x8 +; CHECK-NEXT: adc x9, x9, x11 +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ccmp x9, x10, #0, eq +; CHECK-NEXT: cset w8, ne +; CHECK-NEXT: eor w2, w8, #0x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB21_2: // %overflow.no +; CHECK-NEXT: smulh x1, x0, x2 +; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: eor w2, w8, #0x1 ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 @@ -396,6 +432,11 @@ define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) { define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) { ; CHECK-LABEL: i128_overflowing_mul: ; CHECK: // %bb.0: +; CHECK-NEXT: eor x8, x3, x2, asr #63 +; CHECK-NEXT: eor x9, x1, x0, asr #63 +; CHECK-NEXT: orr x8, x9, x8 +; CHECK-NEXT: cbz x8, .LBB22_2 +; CHECK-NEXT: // %bb.1: // %overflow ; CHECK-NEXT: asr x9, x1, #63 ; CHECK-NEXT: umulh x10, x0, x2 ; CHECK-NEXT: asr x13, x3, #63 @@ -405,24 +446,29 @@ define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) { ; CHECK-NEXT: adds x10, x11, x10 ; CHECK-NEXT: mul x14, x0, x3 ; CHECK-NEXT: umulh x12, x0, x3 -; CHECK-NEXT: adc x9, x8, x9 +; CHECK-NEXT: adc x8, x8, x9 +; CHECK-NEXT: mov x9, x1 ; CHECK-NEXT: mul x13, x0, x13 -; CHECK-NEXT: adds x8, x14, x10 -; CHECK-NEXT: mul x15, x1, x3 -; CHECK-NEXT: smulh x10, x1, x3 -; CHECK-NEXT: mov x1, x8 -; CHECK-NEXT: adc x11, x12, x13 -; CHECK-NEXT: asr x12, x9, #63 -; CHECK-NEXT: asr x13, x11, #63 -; CHECK-NEXT: adds x9, x9, x11 ; CHECK-NEXT: asr x11, x8, #63 +; CHECK-NEXT: mul x15, x1, x3 +; CHECK-NEXT: adds x1, x14, x10 +; CHECK-NEXT: smulh x9, x9, x3 +; CHECK-NEXT: adc x10, x12, x13 +; CHECK-NEXT: asr x12, x10, #63 +; CHECK-NEXT: adds x8, x8, x10 +; CHECK-NEXT: asr x10, x1, #63 ; CHECK-NEXT: mul x0, x0, x2 -; CHECK-NEXT: adc x12, x12, x13 -; CHECK-NEXT: adds x9, x15, x9 -; CHECK-NEXT: adc x10, x10, x12 -; CHECK-NEXT: cmp x9, x11 -; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: adc x11, x11, x12 +; CHECK-NEXT: adds x8, x15, x8 +; CHECK-NEXT: adc x9, x9, x11 +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ccmp x9, x10, #0, eq ; CHECK-NEXT: cset w2, ne +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB22_2: // %overflow.no +; CHECK-NEXT: smulh x1, x0, x2 +; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: mov w2, wzr ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 @@ -436,6 +482,11 @@ define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) { define i128 @i128_saturating_mul(i128 %x, i128 %y) { ; CHECK-LABEL: i128_saturating_mul: ; CHECK: // %bb.0: +; CHECK-NEXT: eor x8, x3, x2, asr #63 +; CHECK-NEXT: eor x9, x1, x0, asr #63 +; CHECK-NEXT: orr x8, x9, x8 +; CHECK-NEXT: cbz x8, .LBB23_2 +; CHECK-NEXT: // %bb.1: // %overflow ; CHECK-NEXT: asr x9, x1, #63 ; CHECK-NEXT: umulh x10, x0, x2 ; CHECK-NEXT: asr x13, x3, #63 @@ -445,29 +496,35 @@ define i128 @i128_saturating_mul(i128 %x, i128 %y) { ; CHECK-NEXT: adds x10, x11, x10 ; CHECK-NEXT: mul x14, x0, x3 ; CHECK-NEXT: umulh x12, x0, x3 -; CHECK-NEXT: adc x8, x8, x9 +; CHECK-NEXT: adc x9, x8, x9 ; CHECK-NEXT: mul x13, x0, x13 -; CHECK-NEXT: adds x9, x14, x10 -; CHECK-NEXT: mul x11, x1, x3 -; CHECK-NEXT: adc x10, x12, x13 -; CHECK-NEXT: smulh x12, x1, x3 -; CHECK-NEXT: asr x13, x8, #63 -; CHECK-NEXT: asr x14, x10, #63 -; CHECK-NEXT: adds x8, x8, x10 -; CHECK-NEXT: adc x10, x13, x14 -; CHECK-NEXT: adds x8, x11, x8 -; CHECK-NEXT: asr x11, x9, #63 -; CHECK-NEXT: mul x13, x0, x2 -; CHECK-NEXT: adc x10, x12, x10 -; CHECK-NEXT: eor x12, x3, x1 -; CHECK-NEXT: eor x8, x8, x11 -; CHECK-NEXT: eor x10, x10, x11 -; CHECK-NEXT: asr x11, x12, #63 -; CHECK-NEXT: orr x8, x8, x10 -; CHECK-NEXT: eor x10, x11, #0x7fffffffffffffff -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: csinv x0, x13, x11, eq -; CHECK-NEXT: csel x1, x10, x9, ne +; CHECK-NEXT: adds x8, x14, x10 +; CHECK-NEXT: mul x15, x1, x3 +; CHECK-NEXT: asr x14, x8, #63 +; CHECK-NEXT: smulh x10, x1, x3 +; CHECK-NEXT: adc x11, x12, x13 +; CHECK-NEXT: asr x12, x9, #63 +; CHECK-NEXT: asr x13, x11, #63 +; CHECK-NEXT: adds x11, x9, x11 +; CHECK-NEXT: mul x9, x0, x2 +; CHECK-NEXT: adc x12, x12, x13 +; CHECK-NEXT: adds x11, x15, x11 +; CHECK-NEXT: adc x10, x10, x12 +; CHECK-NEXT: cmp x11, x14 +; CHECK-NEXT: ccmp x10, x14, #0, eq +; CHECK-NEXT: cset w10, ne +; CHECK-NEXT: b .LBB23_3 +; CHECK-NEXT: .LBB23_2: // %overflow.no +; CHECK-NEXT: smulh x8, x0, x2 +; CHECK-NEXT: mov w10, wzr +; CHECK-NEXT: mul x9, x0, x2 +; CHECK-NEXT: .LBB23_3: // %overflow.res +; CHECK-NEXT: eor x11, x3, x1 +; CHECK-NEXT: cmp w10, #0 +; CHECK-NEXT: asr x11, x11, #63 +; CHECK-NEXT: eor x12, x11, #0x7fffffffffffffff +; CHECK-NEXT: csinv x0, x9, x11, eq +; CHECK-NEXT: csel x1, x12, x8, ne ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 diff --git a/llvm/test/CodeGen/AArch64/i128_with_overflow.ll b/llvm/test/CodeGen/AArch64/i128_with_overflow.ll index 9924b7c63f763..3d90e094a5747 100644 --- a/llvm/test/CodeGen/AArch64/i128_with_overflow.ll +++ b/llvm/test/CodeGen/AArch64/i128_with_overflow.ll @@ -224,21 +224,29 @@ cleanup: define i128 @test_umul_i128(i128 noundef %x, i128 noundef %y) { ; CHECK-LABEL: test_umul_i128: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: orr x8, x1, x3 +; CHECK-NEXT: cbz x8, .LBB4_2 +; CHECK-NEXT: // %bb.1: // %overflow ; CHECK-NEXT: mul x9, x3, x0 ; CHECK-NEXT: cmp x1, #0 ; CHECK-NEXT: ccmp x3, #0, #4, ne -; CHECK-NEXT: umulh x8, x1, x2 -; CHECK-NEXT: umulh x10, x3, x0 +; CHECK-NEXT: umulh x10, x1, x2 +; CHECK-NEXT: umulh x8, x3, x0 ; CHECK-NEXT: madd x9, x1, x2, x9 -; CHECK-NEXT: ccmp xzr, x8, #0, eq -; CHECK-NEXT: umulh x11, x0, x2 ; CHECK-NEXT: ccmp xzr, x10, #0, eq +; CHECK-NEXT: umulh x11, x0, x2 +; CHECK-NEXT: ccmp xzr, x8, #0, eq +; CHECK-NEXT: mul x0, x0, x2 ; CHECK-NEXT: cset w8, ne ; CHECK-NEXT: adds x1, x11, x9 ; CHECK-NEXT: csinc w8, w8, wzr, lo -; CHECK-NEXT: cmp w8, #1 -; CHECK-NEXT: b.ne .LBB4_2 -; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: cbnz w8, .LBB4_3 +; CHECK-NEXT: b .LBB4_4 +; CHECK-NEXT: .LBB4_2: // %overflow.no +; CHECK-NEXT: umulh x1, x0, x2 +; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: cbz w8, .LBB4_4 +; CHECK-NEXT: .LBB4_3: // %if.then ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 @@ -247,9 +255,7 @@ define i128 @test_umul_i128(i128 noundef %x, i128 noundef %y) { ; CHECK-NEXT: sxtw x0, w0 ; CHECK-NEXT: asr x1, x0, #63 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB4_2: // %if.end -; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: .LBB4_4: // %cleanup ; CHECK-NEXT: ret entry: %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y) @@ -273,34 +279,40 @@ cleanup: define i128 @test_smul_i128(i128 noundef %x, i128 noundef %y) { ; CHECK-LABEL: test_smul_i128: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: asr x10, x1, #63 -; CHECK-NEXT: umulh x11, x0, x2 -; CHECK-NEXT: asr x14, x3, #63 -; CHECK-NEXT: mov x8, x1 -; CHECK-NEXT: mul x12, x1, x2 -; CHECK-NEXT: umulh x9, x1, x2 -; CHECK-NEXT: mul x10, x10, x2 -; CHECK-NEXT: adds x11, x12, x11 -; CHECK-NEXT: mul x15, x0, x3 -; CHECK-NEXT: umulh x13, x0, x3 -; CHECK-NEXT: adc x9, x9, x10 -; CHECK-NEXT: mul x14, x0, x14 -; CHECK-NEXT: mul x16, x1, x3 -; CHECK-NEXT: adds x1, x15, x11 -; CHECK-NEXT: asr x11, x9, #63 -; CHECK-NEXT: smulh x8, x8, x3 -; CHECK-NEXT: adc x10, x13, x14 -; CHECK-NEXT: asr x12, x10, #63 -; CHECK-NEXT: adds x9, x9, x10 -; CHECK-NEXT: adc x10, x11, x12 -; CHECK-NEXT: adds x9, x16, x9 -; CHECK-NEXT: asr x11, x1, #63 -; CHECK-NEXT: adc x8, x8, x10 -; CHECK-NEXT: eor x8, x8, x11 -; CHECK-NEXT: eor x9, x9, x11 +; CHECK-NEXT: eor x8, x3, x2, asr #63 +; CHECK-NEXT: eor x9, x1, x0, asr #63 ; CHECK-NEXT: orr x8, x9, x8 -; CHECK-NEXT: cbz x8, .LBB5_2 -; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: cbz x8, .LBB5_4 +; CHECK-NEXT: // %bb.1: // %overflow +; CHECK-NEXT: asr x9, x1, #63 +; CHECK-NEXT: umulh x10, x0, x2 +; CHECK-NEXT: asr x13, x3, #63 +; CHECK-NEXT: mul x11, x1, x2 +; CHECK-NEXT: umulh x8, x1, x2 +; CHECK-NEXT: mul x9, x9, x2 +; CHECK-NEXT: adds x10, x11, x10 +; CHECK-NEXT: mul x14, x0, x3 +; CHECK-NEXT: umulh x12, x0, x3 +; CHECK-NEXT: adc x8, x8, x9 +; CHECK-NEXT: mov x9, x1 +; CHECK-NEXT: mul x13, x0, x13 +; CHECK-NEXT: asr x11, x8, #63 +; CHECK-NEXT: mul x15, x1, x3 +; CHECK-NEXT: adds x1, x14, x10 +; CHECK-NEXT: smulh x9, x9, x3 +; CHECK-NEXT: adc x10, x12, x13 +; CHECK-NEXT: asr x12, x10, #63 +; CHECK-NEXT: adds x8, x8, x10 +; CHECK-NEXT: asr x10, x1, #63 +; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: adc x11, x11, x12 +; CHECK-NEXT: adds x8, x15, x8 +; CHECK-NEXT: adc x9, x9, x11 +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ccmp x9, x10, #0, eq +; CHECK-NEXT: cset w8, ne +; CHECK-NEXT: cbz w8, .LBB5_3 +; CHECK-NEXT: .LBB5_2: // %if.then ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 @@ -309,10 +321,13 @@ define i128 @test_smul_i128(i128 noundef %x, i128 noundef %y) { ; CHECK-NEXT: sxtw x0, w0 ; CHECK-NEXT: asr x1, x0, #63 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .LBB5_3: // %cleanup ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB5_2: // %if.end +; CHECK-NEXT: .LBB5_4: // %overflow.no +; CHECK-NEXT: smulh x1, x0, x2 ; CHECK-NEXT: mul x0, x0, x2 -; CHECK-NEXT: ret +; CHECK-NEXT: cbnz w8, .LBB5_2 +; CHECK-NEXT: b .LBB5_3 entry: %0 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y) %1 = extractvalue { i128, i1 } %0, 1 diff --git a/llvm/test/CodeGen/AArch64/mul-i128-overflow.ll b/llvm/test/CodeGen/AArch64/mul-i128-overflow.ll new file mode 100644 index 0000000000000..7b60f81539aa8 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/mul-i128-overflow.ll @@ -0,0 +1,261 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64 -o - %s | FileCheck %s + + +declare i32 @error() + +define i128 @test1(i128 noundef %x, i128 noundef %y) { +; CHECK-LABEL: test1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eor x8, x3, x2, asr #63 +; CHECK-NEXT: eor x9, x1, x0, asr #63 +; CHECK-NEXT: orr x8, x9, x8 +; CHECK-NEXT: cbz x8, .LBB0_4 +; CHECK-NEXT: // %bb.1: // %overflow +; CHECK-NEXT: asr x9, x1, #63 +; CHECK-NEXT: umulh x10, x0, x2 +; CHECK-NEXT: asr x13, x3, #63 +; CHECK-NEXT: mul x11, x1, x2 +; CHECK-NEXT: umulh x8, x1, x2 +; CHECK-NEXT: mul x9, x9, x2 +; CHECK-NEXT: adds x10, x11, x10 +; CHECK-NEXT: mul x14, x0, x3 +; CHECK-NEXT: umulh x12, x0, x3 +; CHECK-NEXT: adc x8, x8, x9 +; CHECK-NEXT: mov x9, x1 +; CHECK-NEXT: mul x13, x0, x13 +; CHECK-NEXT: asr x11, x8, #63 +; CHECK-NEXT: mul x15, x1, x3 +; CHECK-NEXT: adds x1, x14, x10 +; CHECK-NEXT: smulh x9, x9, x3 +; CHECK-NEXT: adc x10, x12, x13 +; CHECK-NEXT: asr x12, x10, #63 +; CHECK-NEXT: adds x8, x8, x10 +; CHECK-NEXT: asr x10, x1, #63 +; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: adc x11, x11, x12 +; CHECK-NEXT: adds x8, x15, x8 +; CHECK-NEXT: adc x9, x9, x11 +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ccmp x9, x10, #0, eq +; CHECK-NEXT: cset w8, ne +; CHECK-NEXT: cbz w8, .LBB0_3 +; CHECK-NEXT: .LBB0_2: // %if.then +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl error +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxtw x0, w0 +; CHECK-NEXT: asr x1, x0, #63 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .LBB0_3: // %cleanup +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_4: // %overflow.no +; CHECK-NEXT: smulh x1, x0, x2 +; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: cbnz w8, .LBB0_2 +; CHECK-NEXT: b .LBB0_3 +entry: + %0 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y) + %1 = extractvalue { i128, i1 } %0, 1 + br i1 %1, label %if.then, label %if.end + +if.then: + %call = tail call i32 @error() + %conv1 = sext i32 %call to i128 + br label %cleanup + +if.end: + %2 = extractvalue { i128, i1 } %0, 0 + br label %cleanup + +cleanup: + %retval.0 = phi i128 [ %conv1, %if.then ], [ %2, %if.end ] + ret i128 %retval.0 +} + +define i128 @test2(i128 noundef %x, i128 noundef %y, ptr %out) { +; CHECK-LABEL: test2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eor x8, x3, x2, asr #63 +; CHECK-NEXT: eor x9, x1, x0, asr #63 +; CHECK-NEXT: orr x8, x9, x8 +; CHECK-NEXT: cbz x8, .LBB1_4 +; CHECK-NEXT: // %bb.1: // %overflow +; CHECK-NEXT: asr x9, x1, #63 +; CHECK-NEXT: umulh x10, x0, x2 +; CHECK-NEXT: asr x13, x3, #63 +; CHECK-NEXT: mul x11, x1, x2 +; CHECK-NEXT: umulh x8, x1, x2 +; CHECK-NEXT: mul x9, x9, x2 +; CHECK-NEXT: adds x10, x11, x10 +; CHECK-NEXT: mul x14, x0, x3 +; CHECK-NEXT: umulh x12, x0, x3 +; CHECK-NEXT: adc x8, x8, x9 +; CHECK-NEXT: mov x9, x1 +; CHECK-NEXT: mul x13, x0, x13 +; CHECK-NEXT: asr x11, x8, #63 +; CHECK-NEXT: mul x15, x1, x3 +; CHECK-NEXT: adds x1, x14, x10 +; CHECK-NEXT: smulh x9, x9, x3 +; CHECK-NEXT: adc x10, x12, x13 +; CHECK-NEXT: asr x12, x10, #63 +; CHECK-NEXT: adds x8, x8, x10 +; CHECK-NEXT: asr x10, x1, #63 +; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: adc x11, x11, x12 +; CHECK-NEXT: adds x8, x15, x8 +; CHECK-NEXT: adc x9, x9, x11 +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ccmp x9, x10, #0, eq +; CHECK-NEXT: cset w8, ne +; CHECK-NEXT: stp x0, x1, [x4] +; CHECK-NEXT: cbz w8, .LBB1_3 +; CHECK-NEXT: .LBB1_2: // %if.then +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl error +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxtw x0, w0 +; CHECK-NEXT: asr x1, x0, #63 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .LBB1_3: // %cleanup +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_4: // %overflow.no +; CHECK-NEXT: smulh x1, x0, x2 +; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: stp x0, x1, [x4] +; CHECK-NEXT: cbnz w8, .LBB1_2 +; CHECK-NEXT: b .LBB1_3 +entry: + %0 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y) + %1 = extractvalue { i128, i1 } %0, 0 + store i128 %1, ptr %out + %2 = extractvalue { i128, i1 } %0, 1 + br i1 %2, label %if.then, label %cleanup + +if.then: + %call = tail call i32 @error() + %conv1 = sext i32 %call to i128 + br label %cleanup + +cleanup: + %retval.0 = phi i128 [ %conv1, %if.then ], [ %1, %entry ] + ret i128 %retval.0 +} + +define i128 @test3(i128 noundef %x, i128 noundef %y, ptr %out) { +; CHECK-LABEL: test3: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: orr x8, x1, x3 +; CHECK-NEXT: cbz x8, .LBB2_3 +; CHECK-NEXT: // %bb.1: // %overflow +; CHECK-NEXT: mul x8, x3, x0 +; CHECK-NEXT: cmp x1, #0 +; CHECK-NEXT: ccmp x3, #0, #4, ne +; CHECK-NEXT: umulh x10, x1, x2 +; CHECK-NEXT: umulh x9, x3, x0 +; CHECK-NEXT: madd x11, x1, x2, x8 +; CHECK-NEXT: ccmp xzr, x10, #0, eq +; CHECK-NEXT: umulh x12, x0, x2 +; CHECK-NEXT: ccmp xzr, x9, #0, eq +; CHECK-NEXT: mul x8, x0, x2 +; CHECK-NEXT: cset w10, ne +; CHECK-NEXT: adds x9, x12, x11 +; CHECK-NEXT: csinc w10, w10, wzr, lo +; CHECK-NEXT: stp x8, x9, [x4] +; CHECK-NEXT: cbnz w10, .LBB2_4 +; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: mov x1, xzr +; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_3: // %overflow.no +; CHECK-NEXT: umulh x9, x0, x2 +; CHECK-NEXT: mov w10, wzr +; CHECK-NEXT: mul x8, x0, x2 +; CHECK-NEXT: stp x8, x9, [x4] +; CHECK-NEXT: cbz w10, .LBB2_2 +; CHECK-NEXT: .LBB2_4: // %if.then +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl error +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxtw x0, w0 +; CHECK-NEXT: asr x1, x0, #63 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y) + %1 = extractvalue { i128, i1 } %0, 0 + store i128 %1, ptr %out + %2 = extractvalue { i128, i1 } %0, 1 + br i1 %2, label %if.then, label %cleanup + +if.then: + %call = tail call i32 @error() + %conv1 = sext i32 %call to i128 + br label %cleanup + +cleanup: + %retval.0 = phi i128 [ %conv1, %if.then ], [ 1, %entry ] + ret i128 %retval.0 +} + +define i128 @test4(i128 noundef %x, i128 noundef %y, i128 %out) { +; CHECK-LABEL: test4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: orr x8, x1, x3 +; CHECK-NEXT: cbz x8, .LBB3_2 +; CHECK-NEXT: // %bb.1: // %overflow +; CHECK-NEXT: mul x8, x3, x0 +; CHECK-NEXT: cmp x1, #0 +; CHECK-NEXT: ccmp x3, #0, #4, ne +; CHECK-NEXT: umulh x10, x1, x2 +; CHECK-NEXT: umulh x9, x3, x0 +; CHECK-NEXT: madd x11, x1, x2, x8 +; CHECK-NEXT: ccmp xzr, x10, #0, eq +; CHECK-NEXT: umulh x12, x0, x2 +; CHECK-NEXT: ccmp xzr, x9, #0, eq +; CHECK-NEXT: mul x8, x0, x2 +; CHECK-NEXT: cset w10, ne +; CHECK-NEXT: adds x9, x12, x11 +; CHECK-NEXT: csinc w10, w10, wzr, lo +; CHECK-NEXT: b .LBB3_3 +; CHECK-NEXT: .LBB3_2: // %overflow.no +; CHECK-NEXT: umulh x9, x0, x2 +; CHECK-NEXT: mov w10, wzr +; CHECK-NEXT: mul x8, x0, x2 +; CHECK-NEXT: .LBB3_3: // %overflow.res +; CHECK-NEXT: adds x0, x8, x4 +; CHECK-NEXT: adc x1, x9, x5 +; CHECK-NEXT: cbz w10, .LBB3_5 +; CHECK-NEXT: // %bb.4: // %if.then +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl error +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxtw x0, w0 +; CHECK-NEXT: asr x1, x0, #63 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .LBB3_5: // %cleanup +; CHECK-NEXT: ret +entry: + %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y) + %1 = extractvalue { i128, i1 } %0, 0 + %res = add i128 %1, %out + %2 = extractvalue { i128, i1 } %0, 1 + br i1 %2, label %if.then, label %cleanup + +if.then: + %call = tail call i32 @error() + %conv1 = sext i32 %call to i128 + br label %cleanup + +cleanup: + %retval.0 = phi i128 [ %conv1, %if.then ], [ %res, %entry ] + ret i128 %retval.0 +} diff --git a/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll b/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll index 16e8feb0dc5bb..fc3e018f2ec7a 100644 --- a/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll +++ b/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll @@ -632,7 +632,6 @@ define @fsub_sel_fmul_negzero_nxv8bf16( @fsub_sel_fmul_negzero_nxv8bf16( @fsub_sel_fmul_negzero_nxv8bf16( @fadd_sel_fmul_d_negzero( %a, define @fsub_sel_fmul_h_negzero( %a, %b, %c, %mask) { ; CHECK-LABEL: fsub_sel_fmul_h_negzero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 // =0x8000 +; CHECK-NEXT: dupm z3.h, #0x8000 ; CHECK-NEXT: fmul z1.h, z1.h, z2.h -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h +; CHECK-NEXT: sel z1.h, p0, z1.h, z3.h ; CHECK-NEXT: fsub z0.h, z0.h, z1.h ; CHECK-NEXT: ret %fmul = fmul %b, %c @@ -1150,10 +1149,9 @@ define @fsub_sel_fmul_h_negzero( %a, @fsub_sel_fmul_s_negzero( %a, %b, %c, %mask) { ; CHECK-LABEL: fsub_sel_fmul_s_negzero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 +; CHECK-NEXT: mov z3.s, #0x80000000 ; CHECK-NEXT: fmul z1.s, z1.s, z2.s -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s +; CHECK-NEXT: sel z1.s, p0, z1.s, z3.s ; CHECK-NEXT: fsub z0.s, z0.s, z1.s ; CHECK-NEXT: ret %fmul = fmul %b, %c @@ -1166,10 +1164,9 @@ define @fsub_sel_fmul_s_negzero( %a, @fsub_sel_fmul_d_negzero( %a, %b, %c, %mask) { ; CHECK-LABEL: fsub_sel_fmul_d_negzero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: mov z3.d, #0x8000000000000000 ; CHECK-NEXT: fmul z1.d, z1.d, z2.d -; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d +; CHECK-NEXT: sel z1.d, p0, z1.d, z3.d ; CHECK-NEXT: fsub z0.d, z0.d, z1.d ; CHECK-NEXT: ret %fmul = fmul %b, %c diff --git a/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll b/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll index 8750867c56731..1223ae1c0cbdd 100644 --- a/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll @@ -51,10 +51,9 @@ define half @fadda_nxv6f16( %v, half %s) { ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: mov w8, #32768 // =0x8000 +; CHECK-NEXT: dupm z2.h, #0x8000 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: str z0, [sp] -; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fmov s0, s1 ; CHECK-NEXT: st1h { z2.d }, p0, [sp, #3, mul vl] ; CHECK-NEXT: ptrue p0.h @@ -77,12 +76,11 @@ define half @fadda_nxv10f16( %v, half %s) { ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 -; CHECK-NEXT: mov w8, #32768 // =0x8000 ; CHECK-NEXT: str z1, [sp] +; CHECK-NEXT: addvl x8, sp, #1 ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: fadda h2, p0, h2, z0.h -; CHECK-NEXT: mov z0.h, w8 -; CHECK-NEXT: addvl x8, sp, #1 +; CHECK-NEXT: dupm z0.h, #0x8000 ; CHECK-NEXT: st1h { z0.d }, p1, [sp, #1, mul vl] ; CHECK-NEXT: ldr z1, [sp] ; CHECK-NEXT: str z1, [sp, #1, mul vl] @@ -105,11 +103,10 @@ define half @fadda_nxv12f16( %v, half %s) { ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 -; CHECK-NEXT: mov w8, #32768 // =0x8000 +; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: fadda h2, p0, h2, z0.h -; CHECK-NEXT: uunpklo z0.s, z1.h -; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h +; CHECK-NEXT: dupm z0.h, #0x8000 +; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h ; CHECK-NEXT: fadda h2, p0, h2, z0.h ; CHECK-NEXT: fmov s0, s2 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll index 4ae7ac7b292e9..897ade00320db 100644 --- a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll +++ b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll @@ -454,18 +454,17 @@ declare @llvm.fptosi.sat.nxv4f16.nxv4i64() define @test_signed_v2f16_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff -; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mov z2.d, #0xffffffff80000000 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.d, #0xffffffff80000000 -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.d, #0x7fffffff +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: fcvtzs z2.d, p1/m, z0.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov z1.d, #0x7fffffff ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.h -; CHECK-NEXT: sel z0.d, p2, z2.d, z1.d +; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f16.nxv2i32( %f) @@ -475,18 +474,17 @@ define @test_signed_v2f16_v2i32( %f) { define @test_signed_v4f16_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff -; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mov z2.s, #0x80000000 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.s, #0x80000000 -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.s, #0x7fffffff +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: fcvtzs z2.s, p1/m, z0.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov z1.s, #0x7fffffff ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: fcvtzs z1.s, p1/m, z0.h -; CHECK-NEXT: sel z0.s, p2, z2.s, z1.s +; CHECK-NEXT: sel z0.s, p1, z1.s, z2.s ; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f16.nxv4i32( %f) @@ -496,26 +494,25 @@ define @test_signed_v4f16_v4i32( %f) { define @test_signed_v8f16_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64511 // =0xfbff -; CHECK-NEXT: uunpklo z1.s, z0.h +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff +; CHECK-NEXT: uunpklo z2.s, z0.h +; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: uunpkhi z0.s, z0.h -; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: mov z3.s, #0x80000000 ; CHECK-NEXT: mov z4.s, #0x80000000 ; CHECK-NEXT: mov z5.h, w8 -; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z2.h -; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.s, #0x7fffffff +; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, z1.h +; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov z1.s, #0x7fffffff ; CHECK-NEXT: fcmgt p3.h, p0/z, z0.h, z5.h -; CHECK-NEXT: fcvtzs z3.s, p1/m, z1.h -; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z5.h +; CHECK-NEXT: fcvtzs z3.s, p1/m, z2.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z5.h ; CHECK-NEXT: fcvtzs z4.s, p2/m, z0.h -; CHECK-NEXT: fcmuo p2.h, p0/z, z1.h, z1.h +; CHECK-NEXT: fcmuo p2.h, p0/z, z2.h, z2.h ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: sel z0.s, p1, z2.s, z3.s -; CHECK-NEXT: sel z1.s, p3, z2.s, z4.s +; CHECK-NEXT: sel z0.s, p1, z1.s, z3.s +; CHECK-NEXT: sel z1.s, p3, z1.s, z4.s ; CHECK-NEXT: mov z0.s, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 ; CHECK-NEXT: ret @@ -526,18 +523,17 @@ define @test_signed_v8f16_v8i32( %f) { define @test_signed_v4f16_v4i16( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #63488 // =0xf800 +; CHECK-NEXT: dupm z1.h, #0xf800 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z2.s, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #30719 // =0x77ff +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.s, #32767 // =0x7fff -; CHECK-NEXT: fcvtzs z2.s, p1/m, z0.h +; CHECK-NEXT: mov z1.s, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcvtzs z1.s, p1/m, z0.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.s, #32767 // =0x7fff ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: sel z0.s, p2, z1.s, z2.s +; CHECK-NEXT: sel z0.s, p1, z2.s, z1.s ; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f16.nxv4i16( %f) @@ -547,18 +543,17 @@ define @test_signed_v4f16_v4i16( %f) { define @test_signed_v8f16_v8i16( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #63488 // =0xf800 +; CHECK-NEXT: dupm z1.h, #0xf800 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z2.h, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #30719 // =0x77ff +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.h, #32767 // =0x7fff -; CHECK-NEXT: fcvtzs z2.h, p1/m, z0.h +; CHECK-NEXT: mov z1.h, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcvtzs z1.h, p1/m, z0.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.h, #32767 // =0x7fff ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: sel z0.h, p2, z1.h, z2.h +; CHECK-NEXT: sel z0.h, p1, z2.h, z1.h ; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f16.nxv8i16( %f) @@ -568,18 +563,17 @@ define @test_signed_v8f16_v8i16( %f) { define @test_signed_v2f16_v2i64( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff -; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.d, #0x8000000000000000 -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: fcvtzs z2.d, p1/m, z0.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov z1.d, #0x7fffffffffffffff ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.h -; CHECK-NEXT: sel z0.d, p2, z2.d, z1.d +; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f16.nxv2i64( %f) @@ -589,26 +583,25 @@ define @test_signed_v2f16_v2i64( %f) { define @test_signed_v4f16_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64511 // =0xfbff -; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff +; CHECK-NEXT: uunpklo z2.d, z0.s +; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: mov z3.d, #0x8000000000000000 ; CHECK-NEXT: mov z4.d, #0x8000000000000000 ; CHECK-NEXT: mov z5.h, w8 -; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z2.h -; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, z1.h +; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov z1.d, #0x7fffffffffffffff ; CHECK-NEXT: fcmgt p3.h, p0/z, z0.h, z5.h -; CHECK-NEXT: fcvtzs z3.d, p1/m, z1.h -; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z5.h +; CHECK-NEXT: fcvtzs z3.d, p1/m, z2.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z5.h ; CHECK-NEXT: fcvtzs z4.d, p2/m, z0.h -; CHECK-NEXT: fcmuo p2.h, p0/z, z1.h, z1.h +; CHECK-NEXT: fcmuo p2.h, p0/z, z2.h, z2.h ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: sel z0.d, p1, z2.d, z3.d -; CHECK-NEXT: sel z1.d, p3, z2.d, z4.d +; CHECK-NEXT: sel z0.d, p1, z1.d, z3.d +; CHECK-NEXT: sel z1.d, p3, z1.d, z4.d ; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-llrint.ll b/llvm/test/CodeGen/AArch64/sve-llrint.ll index f964d70e0a05c..c2bb0c81ab405 100644 --- a/llvm/test/CodeGen/AArch64/sve-llrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-llrint.ll @@ -5,9 +5,8 @@ define @llrint_v1i64_v1f16( %x) { ; CHECK-LABEL: llrint_v1i64_v1f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: mov z2.d, #0x8000000000000000 -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h @@ -28,9 +27,8 @@ define @llrint_v1i64_v2f16( %x) { ; CHECK-LABEL: llrint_v1i64_v2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: mov z2.d, #0x8000000000000000 -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h @@ -52,10 +50,9 @@ define @llrint_v4i64_v4f16( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: mov w8, #64511 // =0xfbff -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: mov z3.d, #0x8000000000000000 ; CHECK-NEXT: mov z4.d, #0x8000000000000000 ; CHECK-NEXT: mov z5.d, #0x7fffffffffffffff @@ -92,10 +89,9 @@ define @llrint_v8i64_v8f16( %x) { ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z1.s, z0.h ; CHECK-NEXT: uunpkhi z0.s, z0.h -; CHECK-NEXT: mov w8, #64511 // =0xfbff -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z4.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z4.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: mov z5.d, #0x8000000000000000 ; CHECK-NEXT: mov z6.d, #0x8000000000000000 ; CHECK-NEXT: mov z7.d, #0x8000000000000000 @@ -162,12 +158,13 @@ define @llrint_v16i64_v16f16( %x) { ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z2.s, z0.h ; CHECK-NEXT: uunpkhi z3.s, z0.h -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: uunpklo z7.s, z1.h ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z0.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: uunpkhi z1.s, z1.h -; CHECK-NEXT: mov z0.d, #0x8000000000000000 ; CHECK-NEXT: mov z5.d, #0x8000000000000000 +; CHECK-NEXT: mov z29.h, w8 ; CHECK-NEXT: mov z31.d, #0x8000000000000000 ; CHECK-NEXT: uunpklo z4.d, z2.s ; CHECK-NEXT: uunpklo z24.d, z3.s @@ -175,10 +172,8 @@ define @llrint_v16i64_v16f16( %x) { ; CHECK-NEXT: uunpkhi z6.d, z2.s ; CHECK-NEXT: uunpklo z26.d, z7.s ; CHECK-NEXT: uunpkhi z7.d, z7.s -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: uunpklo z30.d, z1.s -; CHECK-NEXT: mov z29.h, w8 ; CHECK-NEXT: mov z3.d, #0x8000000000000000 ; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: movprfx z27, z4 @@ -191,17 +186,17 @@ define @llrint_v16i64_v16f16( %x) { ; CHECK-NEXT: frintx z26.h, p0/m, z26.h ; CHECK-NEXT: frintx z7.h, p0/m, z7.h ; CHECK-NEXT: mov z6.d, #0x8000000000000000 -; CHECK-NEXT: fcmge p1.h, p0/z, z27.h, z2.h -; CHECK-NEXT: fcmge p3.h, p0/z, z24.h, z2.h -; CHECK-NEXT: fcmge p4.h, p0/z, z25.h, z2.h -; CHECK-NEXT: fcmge p2.h, p0/z, z28.h, z2.h -; CHECK-NEXT: fcmge p5.h, p0/z, z26.h, z2.h -; CHECK-NEXT: fcvtzs z0.d, p1/m, z27.h +; CHECK-NEXT: fcmge p1.h, p0/z, z27.h, z0.h +; CHECK-NEXT: fcmge p3.h, p0/z, z24.h, z0.h +; CHECK-NEXT: fcmge p4.h, p0/z, z25.h, z0.h +; CHECK-NEXT: fcmge p2.h, p0/z, z28.h, z0.h +; CHECK-NEXT: fcmge p5.h, p0/z, z26.h, z0.h +; CHECK-NEXT: fcvtzs z2.d, p1/m, z27.h ; CHECK-NEXT: fcvtzs z4.d, p3/m, z24.h ; CHECK-NEXT: fcvtzs z5.d, p4/m, z25.h ; CHECK-NEXT: fcmgt p3.h, p0/z, z27.h, z29.h ; CHECK-NEXT: fcvtzs z3.d, p2/m, z28.h -; CHECK-NEXT: fcmge p4.h, p0/z, z7.h, z2.h +; CHECK-NEXT: fcmge p4.h, p0/z, z7.h, z0.h ; CHECK-NEXT: fcvtzs z6.d, p5/m, z26.h ; CHECK-NEXT: fcmuo p1.h, p0/z, z27.h, z27.h ; CHECK-NEXT: movprfx z27, z30 @@ -212,7 +207,7 @@ define @llrint_v16i64_v16f16( %x) { ; CHECK-NEXT: fcmuo p2.h, p0/z, z28.h, z28.h ; CHECK-NEXT: mov z28.d, #0x8000000000000000 ; CHECK-NEXT: fcvtzs z31.d, p4/m, z7.h -; CHECK-NEXT: fcmge p4.h, p0/z, z27.h, z2.h +; CHECK-NEXT: fcmge p4.h, p0/z, z27.h, z0.h ; CHECK-NEXT: fcmgt p6.h, p0/z, z24.h, z29.h ; CHECK-NEXT: fcmuo p7.h, p0/z, z24.h, z24.h ; CHECK-NEXT: mov z24.d, #0x7fffffffffffffff @@ -221,31 +216,31 @@ define @llrint_v16i64_v16f16( %x) { ; CHECK-NEXT: fcmuo p10.h, p0/z, z25.h, z25.h ; CHECK-NEXT: mov z25.d, #0x8000000000000000 ; CHECK-NEXT: sel z1.d, p5, z24.d, z3.d -; CHECK-NEXT: mov z0.d, p3/m, z24.d ; CHECK-NEXT: sel z3.d, p8, z24.d, z5.d -; CHECK-NEXT: fcmge p4.h, p0/z, z30.h, z2.h +; CHECK-NEXT: fcmge p4.h, p0/z, z30.h, z0.h +; CHECK-NEXT: sel z0.d, p3, z24.d, z2.d ; CHECK-NEXT: sel z2.d, p6, z24.d, z4.d -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z3.d, p10/m, #0 // =0x0 ; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Reload +; CHECK-NEXT: fcmgt p9.h, p0/z, z26.h, z29.h ; CHECK-NEXT: mov z2.d, p7/m, #0 // =0x0 ; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Reload -; CHECK-NEXT: fcmgt p9.h, p0/z, z26.h, z29.h ; CHECK-NEXT: fcvtzs z25.d, p4/m, z30.h +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 ; CHECK-NEXT: fcmgt p5.h, p0/z, z7.h, z29.h ; CHECK-NEXT: fcmgt p6.h, p0/z, z27.h, z29.h -; CHECK-NEXT: fcmgt p4.h, p0/z, z30.h, z29.h ; CHECK-NEXT: sel z4.d, p9, z24.d, z6.d +; CHECK-NEXT: fcmgt p4.h, p0/z, z30.h, z29.h ; CHECK-NEXT: fcmuo p8.h, p0/z, z7.h, z7.h ; CHECK-NEXT: sel z5.d, p5, z24.d, z31.d ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: sel z6.d, p6, z24.d, z28.d ; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p9.h, p0/z, z27.h, z27.h +; CHECK-NEXT: fcmuo p3.h, p0/z, z26.h, z26.h ; CHECK-NEXT: sel z7.d, p4, z24.d, z25.d ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload -; CHECK-NEXT: fcmuo p3.h, p0/z, z26.h, z26.h ; CHECK-NEXT: mov z5.d, p8/m, #0 // =0x0 ; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p0.h, p0/z, z30.h, z30.h @@ -302,48 +297,47 @@ define @llrint_v32i64_v32f16( %x) { ; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x40, 0x1c // $d14 @ cfa - 56 * VG - 16 ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x40, 0x1c // $d15 @ cfa - 64 * VG - 16 ; CHECK-NEXT: uunpklo z4.s, z0.h -; CHECK-NEXT: uunpkhi z5.s, z0.h -; CHECK-NEXT: mov w9, #64511 // =0xfbff -; CHECK-NEXT: uunpklo z6.s, z1.h -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpkhi z28.s, z1.h -; CHECK-NEXT: mov z30.h, w9 +; CHECK-NEXT: uunpkhi z0.s, z0.h ; CHECK-NEXT: mov w9, #31743 // =0x7bff +; CHECK-NEXT: uunpklo z5.s, z1.h +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z28.h, #-1025 // =0xfffffffffffffbff +; CHECK-NEXT: uunpkhi z29.s, z1.h +; CHECK-NEXT: mov z7.d, #0x8000000000000000 ; CHECK-NEXT: uunpklo z13.s, z2.h ; CHECK-NEXT: mov z9.d, #0x8000000000000000 ; CHECK-NEXT: uunpkhi z14.s, z2.h ; CHECK-NEXT: uunpkhi z17.s, z3.h -; CHECK-NEXT: uunpklo z7.d, z4.s +; CHECK-NEXT: uunpklo z6.d, z4.s ; CHECK-NEXT: uunpkhi z4.d, z4.s -; CHECK-NEXT: uunpklo z27.d, z5.s -; CHECK-NEXT: uunpklo z31.d, z6.s -; CHECK-NEXT: uunpkhi z8.d, z6.s -; CHECK-NEXT: uunpkhi z29.d, z5.s -; CHECK-NEXT: uunpkhi z11.d, z28.s -; CHECK-NEXT: uunpklo z10.d, z28.s +; CHECK-NEXT: uunpklo z27.d, z0.s +; CHECK-NEXT: uunpklo z31.d, z5.s +; CHECK-NEXT: uunpkhi z8.d, z5.s +; CHECK-NEXT: uunpkhi z30.d, z0.s +; CHECK-NEXT: uunpkhi z11.d, z29.s +; CHECK-NEXT: uunpklo z10.d, z29.s ; CHECK-NEXT: uunpklo z15.s, z3.h ; CHECK-NEXT: uunpklo z16.d, z14.s ; CHECK-NEXT: uunpkhi z14.d, z14.s ; CHECK-NEXT: mov z24.d, #0x8000000000000000 -; CHECK-NEXT: movprfx z1, z7 -; CHECK-NEXT: frintx z1.h, p0/m, z7.h ; CHECK-NEXT: movprfx z5, z27 ; CHECK-NEXT: frintx z5.h, p0/m, z27.h +; CHECK-NEXT: movprfx z1, z6 +; CHECK-NEXT: frintx z1.h, p0/m, z6.h ; CHECK-NEXT: frintx z4.h, p0/m, z4.h ; CHECK-NEXT: movprfx z12, z31 ; CHECK-NEXT: frintx z12.h, p0/m, z31.h ; CHECK-NEXT: movprfx z27, z8 ; CHECK-NEXT: frintx z27.h, p0/m, z8.h -; CHECK-NEXT: movprfx z6, z29 -; CHECK-NEXT: frintx z6.h, p0/m, z29.h +; CHECK-NEXT: movprfx z6, z30 +; CHECK-NEXT: frintx z6.h, p0/m, z30.h ; CHECK-NEXT: movprfx z31, z10 ; CHECK-NEXT: frintx z31.h, p0/m, z10.h -; CHECK-NEXT: mov z7.d, #0x8000000000000000 ; CHECK-NEXT: mov z8.d, #0x8000000000000000 +; CHECK-NEXT: frintx z11.h, p0/m, z11.h ; CHECK-NEXT: movprfx z3, z16 ; CHECK-NEXT: frintx z3.h, p0/m, z16.h -; CHECK-NEXT: frintx z11.h, p0/m, z11.h -; CHECK-NEXT: mov z29.h, w9 +; CHECK-NEXT: mov z30.h, w9 ; CHECK-NEXT: uunpklo z10.d, z13.s ; CHECK-NEXT: uunpkhi z13.d, z13.s ; CHECK-NEXT: uunpkhi z20.d, z15.s @@ -354,124 +348,124 @@ define @llrint_v32i64_v32f16( %x) { ; CHECK-NEXT: uunpklo z15.d, z15.s ; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: mov z21.d, #0x8000000000000000 +; CHECK-NEXT: frintx z10.h, p0/m, z10.h ; CHECK-NEXT: mov z26.d, #0x8000000000000000 -; CHECK-NEXT: mov z28.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z29.d, #0x7fffffffffffffff ; CHECK-NEXT: movprfx z19, z13 ; CHECK-NEXT: frintx z19.h, p0/m, z13.h ; CHECK-NEXT: movprfx z13, z14 ; CHECK-NEXT: frintx z13.h, p0/m, z14.h -; CHECK-NEXT: frintx z10.h, p0/m, z10.h ; CHECK-NEXT: frintx z16.h, p0/m, z16.h ; CHECK-NEXT: mov z22.d, #0x8000000000000000 ; CHECK-NEXT: mov z23.d, #0x8000000000000000 -; CHECK-NEXT: frintx z15.h, p0/m, z15.h ; CHECK-NEXT: mov z14.d, #0x8000000000000000 -; CHECK-NEXT: fcmge p4.h, p0/z, z4.h, z30.h -; CHECK-NEXT: fcmge p2.h, p0/z, z12.h, z30.h -; CHECK-NEXT: fcmgt p9.h, p0/z, z12.h, z29.h +; CHECK-NEXT: frintx z15.h, p0/m, z15.h +; CHECK-NEXT: fcmge p4.h, p0/z, z4.h, z28.h +; CHECK-NEXT: fcmge p2.h, p0/z, z12.h, z28.h +; CHECK-NEXT: fcmgt p9.h, p0/z, z12.h, z30.h ; CHECK-NEXT: fcmuo p8.h, p0/z, z12.h, z12.h ; CHECK-NEXT: fcvtzs z7.d, p4/m, z4.h ; CHECK-NEXT: fcvtzs z8.d, p2/m, z12.h ; CHECK-NEXT: mov z12.d, #0x8000000000000000 -; CHECK-NEXT: fcmge p4.h, p0/z, z27.h, z30.h +; CHECK-NEXT: fcmge p4.h, p0/z, z27.h, z28.h ; CHECK-NEXT: fcmuo p10.h, p0/z, z11.h, z11.h -; CHECK-NEXT: fcmge p3.h, p0/z, z5.h, z30.h -; CHECK-NEXT: mov z8.d, p9/m, z28.d +; CHECK-NEXT: fcmge p3.h, p0/z, z5.h, z28.h +; CHECK-NEXT: mov z8.d, p9/m, z29.d ; CHECK-NEXT: fcvtzs z9.d, p4/m, z27.h -; CHECK-NEXT: fcmge p4.h, p0/z, z11.h, z30.h +; CHECK-NEXT: fcmge p4.h, p0/z, z11.h, z28.h ; CHECK-NEXT: fcvtzs z24.d, p3/m, z5.h ; CHECK-NEXT: mov z8.d, p8/m, #0 // =0x0 -; CHECK-NEXT: fcmge p1.h, p0/z, z6.h, z30.h -; CHECK-NEXT: fcmge p5.h, p0/z, z1.h, z30.h +; CHECK-NEXT: fcmge p1.h, p0/z, z6.h, z28.h +; CHECK-NEXT: fcmge p5.h, p0/z, z1.h, z28.h ; CHECK-NEXT: str z8, [x8, #4, mul vl] ; CHECK-NEXT: fcvtzs z12.d, p4/m, z11.h -; CHECK-NEXT: fcmgt p4.h, p0/z, z11.h, z29.h +; CHECK-NEXT: fcmgt p4.h, p0/z, z11.h, z30.h ; CHECK-NEXT: uunpkhi z11.d, z17.s ; CHECK-NEXT: movprfx z17, z20 ; CHECK-NEXT: frintx z17.h, p0/m, z20.h ; CHECK-NEXT: fcvtzs z25.d, p1/m, z6.h ; CHECK-NEXT: mov z20.d, #0x8000000000000000 ; CHECK-NEXT: fcvtzs z0.d, p5/m, z1.h -; CHECK-NEXT: fcmge p6.h, p0/z, z10.h, z30.h +; CHECK-NEXT: fcmge p6.h, p0/z, z10.h, z28.h ; CHECK-NEXT: frintx z11.h, p0/m, z11.h -; CHECK-NEXT: fcmge p3.h, p0/z, z31.h, z30.h -; CHECK-NEXT: fcmge p1.h, p0/z, z13.h, z30.h +; CHECK-NEXT: fcmge p3.h, p0/z, z31.h, z28.h +; CHECK-NEXT: fcmge p1.h, p0/z, z13.h, z28.h ; CHECK-NEXT: fcvtzs z18.d, p6/m, z10.h -; CHECK-NEXT: fcmgt p11.h, p0/z, z10.h, z29.h -; CHECK-NEXT: fcmge p5.h, p0/z, z11.h, z30.h +; CHECK-NEXT: fcmgt p11.h, p0/z, z10.h, z30.h +; CHECK-NEXT: fcmge p5.h, p0/z, z11.h, z28.h ; CHECK-NEXT: fcvtzs z2.d, p3/m, z31.h ; CHECK-NEXT: fcvtzs z21.d, p1/m, z13.h -; CHECK-NEXT: fcmge p2.h, p0/z, z17.h, z30.h -; CHECK-NEXT: fcmge p3.h, p0/z, z16.h, z30.h +; CHECK-NEXT: fcmge p2.h, p0/z, z17.h, z28.h +; CHECK-NEXT: fcmge p3.h, p0/z, z16.h, z28.h ; CHECK-NEXT: fcmuo p1.h, p0/z, z10.h, z10.h -; CHECK-NEXT: sel z10.d, p4, z28.d, z12.d -; CHECK-NEXT: sel z12.d, p11, z28.d, z18.d +; CHECK-NEXT: sel z10.d, p4, z29.d, z12.d +; CHECK-NEXT: sel z12.d, p11, z29.d, z18.d ; CHECK-NEXT: fcvtzs z26.d, p5/m, z11.h ; CHECK-NEXT: fcvtzs z22.d, p2/m, z17.h -; CHECK-NEXT: fcmgt p4.h, p0/z, z11.h, z29.h +; CHECK-NEXT: fcmgt p4.h, p0/z, z11.h, z30.h ; CHECK-NEXT: fcvtzs z23.d, p3/m, z16.h ; CHECK-NEXT: mov z10.d, p10/m, #0 // =0x0 ; CHECK-NEXT: mov z12.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmge p6.h, p0/z, z19.h, z30.h +; CHECK-NEXT: fcmge p6.h, p0/z, z19.h, z28.h ; CHECK-NEXT: str z10, [x8, #7, mul vl] -; CHECK-NEXT: fcmge p7.h, p0/z, z3.h, z30.h +; CHECK-NEXT: fcmge p7.h, p0/z, z3.h, z28.h ; CHECK-NEXT: str z12, [x8, #8, mul vl] -; CHECK-NEXT: mov z26.d, p4/m, z28.d -; CHECK-NEXT: fcmge p2.h, p0/z, z15.h, z30.h -; CHECK-NEXT: mov z30.d, #0x8000000000000000 +; CHECK-NEXT: mov z26.d, p4/m, z29.d +; CHECK-NEXT: fcmge p2.h, p0/z, z15.h, z28.h +; CHECK-NEXT: mov z28.d, #0x8000000000000000 ; CHECK-NEXT: fcvtzs z14.d, p6/m, z19.h -; CHECK-NEXT: fcmgt p5.h, p0/z, z16.h, z29.h -; CHECK-NEXT: fcmgt p3.h, p0/z, z17.h, z29.h +; CHECK-NEXT: fcmgt p5.h, p0/z, z16.h, z30.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z17.h, z30.h ; CHECK-NEXT: fcvtzs z20.d, p7/m, z3.h -; CHECK-NEXT: fcvtzs z30.d, p2/m, z15.h +; CHECK-NEXT: fcvtzs z28.d, p2/m, z15.h ; CHECK-NEXT: fcmuo p1.h, p0/z, z11.h, z11.h ; CHECK-NEXT: fcmuo p2.h, p0/z, z16.h, z16.h -; CHECK-NEXT: sel z11.d, p5, z28.d, z23.d -; CHECK-NEXT: sel z16.d, p3, z28.d, z22.d -; CHECK-NEXT: fcmgt p4.h, p0/z, z19.h, z29.h -; CHECK-NEXT: fcmgt p3.h, p0/z, z15.h, z29.h +; CHECK-NEXT: sel z11.d, p5, z29.d, z23.d +; CHECK-NEXT: sel z16.d, p3, z29.d, z22.d +; CHECK-NEXT: fcmgt p4.h, p0/z, z19.h, z30.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z15.h, z30.h ; CHECK-NEXT: mov z26.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z11.d, p2/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p1.h, p0/z, z13.h, z29.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z13.h, z30.h ; CHECK-NEXT: fcmuo p6.h, p0/z, z17.h, z17.h ; CHECK-NEXT: str z26, [x8, #15, mul vl] -; CHECK-NEXT: sel z26.d, p4, z28.d, z14.d +; CHECK-NEXT: sel z26.d, p4, z29.d, z14.d ; CHECK-NEXT: str z11, [x8, #14, mul vl] -; CHECK-NEXT: mov z30.d, p3/m, z28.d -; CHECK-NEXT: fcmgt p2.h, p0/z, z3.h, z29.h +; CHECK-NEXT: mov z28.d, p3/m, z29.d +; CHECK-NEXT: fcmgt p2.h, p0/z, z3.h, z30.h ; CHECK-NEXT: fcmuo p4.h, p0/z, z13.h, z13.h ; CHECK-NEXT: fcmuo p3.h, p0/z, z3.h, z3.h -; CHECK-NEXT: sel z3.d, p1, z28.d, z21.d +; CHECK-NEXT: sel z3.d, p1, z29.d, z21.d ; CHECK-NEXT: mov z16.d, p6/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p12.h, p0/z, z27.h, z29.h -; CHECK-NEXT: sel z11.d, p2, z28.d, z20.d +; CHECK-NEXT: fcmgt p12.h, p0/z, z27.h, z30.h +; CHECK-NEXT: sel z11.d, p2, z29.d, z20.d ; CHECK-NEXT: str z16, [x8, #13, mul vl] ; CHECK-NEXT: mov z3.d, p4/m, #0 // =0x0 ; CHECK-NEXT: fcmuo p6.h, p0/z, z15.h, z15.h -; CHECK-NEXT: fcmgt p1.h, p0/z, z4.h, z29.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z4.h, z30.h ; CHECK-NEXT: mov z11.d, p3/m, #0 // =0x0 -; CHECK-NEXT: mov z9.d, p12/m, z28.d +; CHECK-NEXT: mov z9.d, p12/m, z29.d ; CHECK-NEXT: str z3, [x8, #11, mul vl] ; CHECK-NEXT: fcmuo p5.h, p0/z, z19.h, z19.h -; CHECK-NEXT: fcmgt p2.h, p0/z, z5.h, z29.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z5.h, z30.h ; CHECK-NEXT: str z11, [x8, #10, mul vl] -; CHECK-NEXT: mov z30.d, p6/m, #0 // =0x0 -; CHECK-NEXT: sel z3.d, p1, z28.d, z7.d -; CHECK-NEXT: fcmgt p4.h, p0/z, z6.h, z29.h +; CHECK-NEXT: mov z28.d, p6/m, #0 // =0x0 +; CHECK-NEXT: sel z3.d, p1, z29.d, z7.d +; CHECK-NEXT: fcmgt p4.h, p0/z, z6.h, z30.h ; CHECK-NEXT: fcmuo p3.h, p0/z, z27.h, z27.h -; CHECK-NEXT: str z30, [x8, #12, mul vl] +; CHECK-NEXT: str z28, [x8, #12, mul vl] ; CHECK-NEXT: mov z26.d, p5/m, #0 // =0x0 -; CHECK-NEXT: sel z7.d, p2, z28.d, z24.d -; CHECK-NEXT: fcmgt p6.h, p0/z, z31.h, z29.h -; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z29.h +; CHECK-NEXT: sel z7.d, p2, z29.d, z24.d +; CHECK-NEXT: fcmgt p6.h, p0/z, z31.h, z30.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z30.h ; CHECK-NEXT: str z26, [x8, #9, mul vl] -; CHECK-NEXT: sel z24.d, p4, z28.d, z25.d +; CHECK-NEXT: sel z24.d, p4, z29.d, z25.d ; CHECK-NEXT: mov z9.d, p3/m, #0 // =0x0 ; CHECK-NEXT: fcmuo p5.h, p0/z, z31.h, z31.h ; CHECK-NEXT: fcmuo p2.h, p0/z, z6.h, z6.h -; CHECK-NEXT: mov z2.d, p6/m, z28.d +; CHECK-NEXT: mov z2.d, p6/m, z29.d ; CHECK-NEXT: str z9, [x8, #5, mul vl] -; CHECK-NEXT: mov z0.d, p1/m, z28.d +; CHECK-NEXT: mov z0.d, p1/m, z29.d ; CHECK-NEXT: fcmuo p3.h, p0/z, z5.h, z5.h ; CHECK-NEXT: fcmuo p4.h, p0/z, z4.h, z4.h ; CHECK-NEXT: mov z2.d, p5/m, #0 // =0x0 diff --git a/llvm/test/CodeGen/AArch64/sve-lrint.ll b/llvm/test/CodeGen/AArch64/sve-lrint.ll index f517e7fe8dc16..f1224d30d53cc 100644 --- a/llvm/test/CodeGen/AArch64/sve-lrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-lrint.ll @@ -6,9 +6,8 @@ define @lrint_v1f16( %x) { ; CHECK-LABEL: lrint_v1f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: mov z2.d, #0x8000000000000000 -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h @@ -29,9 +28,8 @@ define @lrint_v2f16( %x) { ; CHECK-LABEL: lrint_v2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: mov z2.d, #0x8000000000000000 -; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h @@ -53,10 +51,9 @@ define @lrint_v4f16( %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: mov w8, #64511 // =0xfbff -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: mov z3.d, #0x8000000000000000 ; CHECK-NEXT: mov z4.d, #0x8000000000000000 ; CHECK-NEXT: mov z5.d, #0x7fffffffffffffff @@ -93,10 +90,9 @@ define @lrint_v8f16( %x) { ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z1.s, z0.h ; CHECK-NEXT: uunpkhi z0.s, z0.h -; CHECK-NEXT: mov w8, #64511 // =0xfbff -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z4.h, w8 ; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z4.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: mov z5.d, #0x8000000000000000 ; CHECK-NEXT: mov z6.d, #0x8000000000000000 ; CHECK-NEXT: mov z7.d, #0x8000000000000000 @@ -163,12 +159,13 @@ define @lrint_v16f16( %x) { ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z2.s, z0.h ; CHECK-NEXT: uunpkhi z3.s, z0.h -; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: uunpklo z7.s, z1.h ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z0.h, #-1025 // =0xfffffffffffffbff ; CHECK-NEXT: uunpkhi z1.s, z1.h -; CHECK-NEXT: mov z0.d, #0x8000000000000000 ; CHECK-NEXT: mov z5.d, #0x8000000000000000 +; CHECK-NEXT: mov z29.h, w8 ; CHECK-NEXT: mov z31.d, #0x8000000000000000 ; CHECK-NEXT: uunpklo z4.d, z2.s ; CHECK-NEXT: uunpklo z24.d, z3.s @@ -176,10 +173,8 @@ define @lrint_v16f16( %x) { ; CHECK-NEXT: uunpkhi z6.d, z2.s ; CHECK-NEXT: uunpklo z26.d, z7.s ; CHECK-NEXT: uunpkhi z7.d, z7.s -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: uunpklo z30.d, z1.s -; CHECK-NEXT: mov z29.h, w8 ; CHECK-NEXT: mov z3.d, #0x8000000000000000 ; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: movprfx z27, z4 @@ -192,17 +187,17 @@ define @lrint_v16f16( %x) { ; CHECK-NEXT: frintx z26.h, p0/m, z26.h ; CHECK-NEXT: frintx z7.h, p0/m, z7.h ; CHECK-NEXT: mov z6.d, #0x8000000000000000 -; CHECK-NEXT: fcmge p1.h, p0/z, z27.h, z2.h -; CHECK-NEXT: fcmge p3.h, p0/z, z24.h, z2.h -; CHECK-NEXT: fcmge p4.h, p0/z, z25.h, z2.h -; CHECK-NEXT: fcmge p2.h, p0/z, z28.h, z2.h -; CHECK-NEXT: fcmge p5.h, p0/z, z26.h, z2.h -; CHECK-NEXT: fcvtzs z0.d, p1/m, z27.h +; CHECK-NEXT: fcmge p1.h, p0/z, z27.h, z0.h +; CHECK-NEXT: fcmge p3.h, p0/z, z24.h, z0.h +; CHECK-NEXT: fcmge p4.h, p0/z, z25.h, z0.h +; CHECK-NEXT: fcmge p2.h, p0/z, z28.h, z0.h +; CHECK-NEXT: fcmge p5.h, p0/z, z26.h, z0.h +; CHECK-NEXT: fcvtzs z2.d, p1/m, z27.h ; CHECK-NEXT: fcvtzs z4.d, p3/m, z24.h ; CHECK-NEXT: fcvtzs z5.d, p4/m, z25.h ; CHECK-NEXT: fcmgt p3.h, p0/z, z27.h, z29.h ; CHECK-NEXT: fcvtzs z3.d, p2/m, z28.h -; CHECK-NEXT: fcmge p4.h, p0/z, z7.h, z2.h +; CHECK-NEXT: fcmge p4.h, p0/z, z7.h, z0.h ; CHECK-NEXT: fcvtzs z6.d, p5/m, z26.h ; CHECK-NEXT: fcmuo p1.h, p0/z, z27.h, z27.h ; CHECK-NEXT: movprfx z27, z30 @@ -213,7 +208,7 @@ define @lrint_v16f16( %x) { ; CHECK-NEXT: fcmuo p2.h, p0/z, z28.h, z28.h ; CHECK-NEXT: mov z28.d, #0x8000000000000000 ; CHECK-NEXT: fcvtzs z31.d, p4/m, z7.h -; CHECK-NEXT: fcmge p4.h, p0/z, z27.h, z2.h +; CHECK-NEXT: fcmge p4.h, p0/z, z27.h, z0.h ; CHECK-NEXT: fcmgt p6.h, p0/z, z24.h, z29.h ; CHECK-NEXT: fcmuo p7.h, p0/z, z24.h, z24.h ; CHECK-NEXT: mov z24.d, #0x7fffffffffffffff @@ -222,31 +217,31 @@ define @lrint_v16f16( %x) { ; CHECK-NEXT: fcmuo p10.h, p0/z, z25.h, z25.h ; CHECK-NEXT: mov z25.d, #0x8000000000000000 ; CHECK-NEXT: sel z1.d, p5, z24.d, z3.d -; CHECK-NEXT: mov z0.d, p3/m, z24.d ; CHECK-NEXT: sel z3.d, p8, z24.d, z5.d -; CHECK-NEXT: fcmge p4.h, p0/z, z30.h, z2.h +; CHECK-NEXT: fcmge p4.h, p0/z, z30.h, z0.h +; CHECK-NEXT: sel z0.d, p3, z24.d, z2.d ; CHECK-NEXT: sel z2.d, p6, z24.d, z4.d -; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 ; CHECK-NEXT: mov z3.d, p10/m, #0 // =0x0 ; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Reload +; CHECK-NEXT: fcmgt p9.h, p0/z, z26.h, z29.h ; CHECK-NEXT: mov z2.d, p7/m, #0 // =0x0 ; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Reload -; CHECK-NEXT: fcmgt p9.h, p0/z, z26.h, z29.h ; CHECK-NEXT: fcvtzs z25.d, p4/m, z30.h +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 ; CHECK-NEXT: fcmgt p5.h, p0/z, z7.h, z29.h ; CHECK-NEXT: fcmgt p6.h, p0/z, z27.h, z29.h -; CHECK-NEXT: fcmgt p4.h, p0/z, z30.h, z29.h ; CHECK-NEXT: sel z4.d, p9, z24.d, z6.d +; CHECK-NEXT: fcmgt p4.h, p0/z, z30.h, z29.h ; CHECK-NEXT: fcmuo p8.h, p0/z, z7.h, z7.h ; CHECK-NEXT: sel z5.d, p5, z24.d, z31.d ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload ; CHECK-NEXT: sel z6.d, p6, z24.d, z28.d ; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p9.h, p0/z, z27.h, z27.h +; CHECK-NEXT: fcmuo p3.h, p0/z, z26.h, z26.h ; CHECK-NEXT: sel z7.d, p4, z24.d, z25.d ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload -; CHECK-NEXT: fcmuo p3.h, p0/z, z26.h, z26.h ; CHECK-NEXT: mov z5.d, p8/m, #0 // =0x0 ; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Reload ; CHECK-NEXT: fcmuo p0.h, p0/z, z30.h, z30.h @@ -303,48 +298,47 @@ define @lrint_v32f16( %x) { ; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x40, 0x1c // $d14 @ cfa - 56 * VG - 16 ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x40, 0x1c // $d15 @ cfa - 64 * VG - 16 ; CHECK-NEXT: uunpklo z4.s, z0.h -; CHECK-NEXT: uunpkhi z5.s, z0.h -; CHECK-NEXT: mov w9, #64511 // =0xfbff -; CHECK-NEXT: uunpklo z6.s, z1.h -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpkhi z28.s, z1.h -; CHECK-NEXT: mov z30.h, w9 +; CHECK-NEXT: uunpkhi z0.s, z0.h ; CHECK-NEXT: mov w9, #31743 // =0x7bff +; CHECK-NEXT: uunpklo z5.s, z1.h +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z28.h, #-1025 // =0xfffffffffffffbff +; CHECK-NEXT: uunpkhi z29.s, z1.h +; CHECK-NEXT: mov z7.d, #0x8000000000000000 ; CHECK-NEXT: uunpklo z13.s, z2.h ; CHECK-NEXT: mov z9.d, #0x8000000000000000 ; CHECK-NEXT: uunpkhi z14.s, z2.h ; CHECK-NEXT: uunpkhi z17.s, z3.h -; CHECK-NEXT: uunpklo z7.d, z4.s +; CHECK-NEXT: uunpklo z6.d, z4.s ; CHECK-NEXT: uunpkhi z4.d, z4.s -; CHECK-NEXT: uunpklo z27.d, z5.s -; CHECK-NEXT: uunpklo z31.d, z6.s -; CHECK-NEXT: uunpkhi z8.d, z6.s -; CHECK-NEXT: uunpkhi z29.d, z5.s -; CHECK-NEXT: uunpkhi z11.d, z28.s -; CHECK-NEXT: uunpklo z10.d, z28.s +; CHECK-NEXT: uunpklo z27.d, z0.s +; CHECK-NEXT: uunpklo z31.d, z5.s +; CHECK-NEXT: uunpkhi z8.d, z5.s +; CHECK-NEXT: uunpkhi z30.d, z0.s +; CHECK-NEXT: uunpkhi z11.d, z29.s +; CHECK-NEXT: uunpklo z10.d, z29.s ; CHECK-NEXT: uunpklo z15.s, z3.h ; CHECK-NEXT: uunpklo z16.d, z14.s ; CHECK-NEXT: uunpkhi z14.d, z14.s ; CHECK-NEXT: mov z24.d, #0x8000000000000000 -; CHECK-NEXT: movprfx z1, z7 -; CHECK-NEXT: frintx z1.h, p0/m, z7.h ; CHECK-NEXT: movprfx z5, z27 ; CHECK-NEXT: frintx z5.h, p0/m, z27.h +; CHECK-NEXT: movprfx z1, z6 +; CHECK-NEXT: frintx z1.h, p0/m, z6.h ; CHECK-NEXT: frintx z4.h, p0/m, z4.h ; CHECK-NEXT: movprfx z12, z31 ; CHECK-NEXT: frintx z12.h, p0/m, z31.h ; CHECK-NEXT: movprfx z27, z8 ; CHECK-NEXT: frintx z27.h, p0/m, z8.h -; CHECK-NEXT: movprfx z6, z29 -; CHECK-NEXT: frintx z6.h, p0/m, z29.h +; CHECK-NEXT: movprfx z6, z30 +; CHECK-NEXT: frintx z6.h, p0/m, z30.h ; CHECK-NEXT: movprfx z31, z10 ; CHECK-NEXT: frintx z31.h, p0/m, z10.h -; CHECK-NEXT: mov z7.d, #0x8000000000000000 ; CHECK-NEXT: mov z8.d, #0x8000000000000000 +; CHECK-NEXT: frintx z11.h, p0/m, z11.h ; CHECK-NEXT: movprfx z3, z16 ; CHECK-NEXT: frintx z3.h, p0/m, z16.h -; CHECK-NEXT: frintx z11.h, p0/m, z11.h -; CHECK-NEXT: mov z29.h, w9 +; CHECK-NEXT: mov z30.h, w9 ; CHECK-NEXT: uunpklo z10.d, z13.s ; CHECK-NEXT: uunpkhi z13.d, z13.s ; CHECK-NEXT: uunpkhi z20.d, z15.s @@ -355,124 +349,124 @@ define @lrint_v32f16( %x) { ; CHECK-NEXT: uunpklo z15.d, z15.s ; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: mov z21.d, #0x8000000000000000 +; CHECK-NEXT: frintx z10.h, p0/m, z10.h ; CHECK-NEXT: mov z26.d, #0x8000000000000000 -; CHECK-NEXT: mov z28.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z29.d, #0x7fffffffffffffff ; CHECK-NEXT: movprfx z19, z13 ; CHECK-NEXT: frintx z19.h, p0/m, z13.h ; CHECK-NEXT: movprfx z13, z14 ; CHECK-NEXT: frintx z13.h, p0/m, z14.h -; CHECK-NEXT: frintx z10.h, p0/m, z10.h ; CHECK-NEXT: frintx z16.h, p0/m, z16.h ; CHECK-NEXT: mov z22.d, #0x8000000000000000 ; CHECK-NEXT: mov z23.d, #0x8000000000000000 -; CHECK-NEXT: frintx z15.h, p0/m, z15.h ; CHECK-NEXT: mov z14.d, #0x8000000000000000 -; CHECK-NEXT: fcmge p4.h, p0/z, z4.h, z30.h -; CHECK-NEXT: fcmge p2.h, p0/z, z12.h, z30.h -; CHECK-NEXT: fcmgt p9.h, p0/z, z12.h, z29.h +; CHECK-NEXT: frintx z15.h, p0/m, z15.h +; CHECK-NEXT: fcmge p4.h, p0/z, z4.h, z28.h +; CHECK-NEXT: fcmge p2.h, p0/z, z12.h, z28.h +; CHECK-NEXT: fcmgt p9.h, p0/z, z12.h, z30.h ; CHECK-NEXT: fcmuo p8.h, p0/z, z12.h, z12.h ; CHECK-NEXT: fcvtzs z7.d, p4/m, z4.h ; CHECK-NEXT: fcvtzs z8.d, p2/m, z12.h ; CHECK-NEXT: mov z12.d, #0x8000000000000000 -; CHECK-NEXT: fcmge p4.h, p0/z, z27.h, z30.h +; CHECK-NEXT: fcmge p4.h, p0/z, z27.h, z28.h ; CHECK-NEXT: fcmuo p10.h, p0/z, z11.h, z11.h -; CHECK-NEXT: fcmge p3.h, p0/z, z5.h, z30.h -; CHECK-NEXT: mov z8.d, p9/m, z28.d +; CHECK-NEXT: fcmge p3.h, p0/z, z5.h, z28.h +; CHECK-NEXT: mov z8.d, p9/m, z29.d ; CHECK-NEXT: fcvtzs z9.d, p4/m, z27.h -; CHECK-NEXT: fcmge p4.h, p0/z, z11.h, z30.h +; CHECK-NEXT: fcmge p4.h, p0/z, z11.h, z28.h ; CHECK-NEXT: fcvtzs z24.d, p3/m, z5.h ; CHECK-NEXT: mov z8.d, p8/m, #0 // =0x0 -; CHECK-NEXT: fcmge p1.h, p0/z, z6.h, z30.h -; CHECK-NEXT: fcmge p5.h, p0/z, z1.h, z30.h +; CHECK-NEXT: fcmge p1.h, p0/z, z6.h, z28.h +; CHECK-NEXT: fcmge p5.h, p0/z, z1.h, z28.h ; CHECK-NEXT: str z8, [x8, #4, mul vl] ; CHECK-NEXT: fcvtzs z12.d, p4/m, z11.h -; CHECK-NEXT: fcmgt p4.h, p0/z, z11.h, z29.h +; CHECK-NEXT: fcmgt p4.h, p0/z, z11.h, z30.h ; CHECK-NEXT: uunpkhi z11.d, z17.s ; CHECK-NEXT: movprfx z17, z20 ; CHECK-NEXT: frintx z17.h, p0/m, z20.h ; CHECK-NEXT: fcvtzs z25.d, p1/m, z6.h ; CHECK-NEXT: mov z20.d, #0x8000000000000000 ; CHECK-NEXT: fcvtzs z0.d, p5/m, z1.h -; CHECK-NEXT: fcmge p6.h, p0/z, z10.h, z30.h +; CHECK-NEXT: fcmge p6.h, p0/z, z10.h, z28.h ; CHECK-NEXT: frintx z11.h, p0/m, z11.h -; CHECK-NEXT: fcmge p3.h, p0/z, z31.h, z30.h -; CHECK-NEXT: fcmge p1.h, p0/z, z13.h, z30.h +; CHECK-NEXT: fcmge p3.h, p0/z, z31.h, z28.h +; CHECK-NEXT: fcmge p1.h, p0/z, z13.h, z28.h ; CHECK-NEXT: fcvtzs z18.d, p6/m, z10.h -; CHECK-NEXT: fcmgt p11.h, p0/z, z10.h, z29.h -; CHECK-NEXT: fcmge p5.h, p0/z, z11.h, z30.h +; CHECK-NEXT: fcmgt p11.h, p0/z, z10.h, z30.h +; CHECK-NEXT: fcmge p5.h, p0/z, z11.h, z28.h ; CHECK-NEXT: fcvtzs z2.d, p3/m, z31.h ; CHECK-NEXT: fcvtzs z21.d, p1/m, z13.h -; CHECK-NEXT: fcmge p2.h, p0/z, z17.h, z30.h -; CHECK-NEXT: fcmge p3.h, p0/z, z16.h, z30.h +; CHECK-NEXT: fcmge p2.h, p0/z, z17.h, z28.h +; CHECK-NEXT: fcmge p3.h, p0/z, z16.h, z28.h ; CHECK-NEXT: fcmuo p1.h, p0/z, z10.h, z10.h -; CHECK-NEXT: sel z10.d, p4, z28.d, z12.d -; CHECK-NEXT: sel z12.d, p11, z28.d, z18.d +; CHECK-NEXT: sel z10.d, p4, z29.d, z12.d +; CHECK-NEXT: sel z12.d, p11, z29.d, z18.d ; CHECK-NEXT: fcvtzs z26.d, p5/m, z11.h ; CHECK-NEXT: fcvtzs z22.d, p2/m, z17.h -; CHECK-NEXT: fcmgt p4.h, p0/z, z11.h, z29.h +; CHECK-NEXT: fcmgt p4.h, p0/z, z11.h, z30.h ; CHECK-NEXT: fcvtzs z23.d, p3/m, z16.h ; CHECK-NEXT: mov z10.d, p10/m, #0 // =0x0 ; CHECK-NEXT: mov z12.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmge p6.h, p0/z, z19.h, z30.h +; CHECK-NEXT: fcmge p6.h, p0/z, z19.h, z28.h ; CHECK-NEXT: str z10, [x8, #7, mul vl] -; CHECK-NEXT: fcmge p7.h, p0/z, z3.h, z30.h +; CHECK-NEXT: fcmge p7.h, p0/z, z3.h, z28.h ; CHECK-NEXT: str z12, [x8, #8, mul vl] -; CHECK-NEXT: mov z26.d, p4/m, z28.d -; CHECK-NEXT: fcmge p2.h, p0/z, z15.h, z30.h -; CHECK-NEXT: mov z30.d, #0x8000000000000000 +; CHECK-NEXT: mov z26.d, p4/m, z29.d +; CHECK-NEXT: fcmge p2.h, p0/z, z15.h, z28.h +; CHECK-NEXT: mov z28.d, #0x8000000000000000 ; CHECK-NEXT: fcvtzs z14.d, p6/m, z19.h -; CHECK-NEXT: fcmgt p5.h, p0/z, z16.h, z29.h -; CHECK-NEXT: fcmgt p3.h, p0/z, z17.h, z29.h +; CHECK-NEXT: fcmgt p5.h, p0/z, z16.h, z30.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z17.h, z30.h ; CHECK-NEXT: fcvtzs z20.d, p7/m, z3.h -; CHECK-NEXT: fcvtzs z30.d, p2/m, z15.h +; CHECK-NEXT: fcvtzs z28.d, p2/m, z15.h ; CHECK-NEXT: fcmuo p1.h, p0/z, z11.h, z11.h ; CHECK-NEXT: fcmuo p2.h, p0/z, z16.h, z16.h -; CHECK-NEXT: sel z11.d, p5, z28.d, z23.d -; CHECK-NEXT: sel z16.d, p3, z28.d, z22.d -; CHECK-NEXT: fcmgt p4.h, p0/z, z19.h, z29.h -; CHECK-NEXT: fcmgt p3.h, p0/z, z15.h, z29.h +; CHECK-NEXT: sel z11.d, p5, z29.d, z23.d +; CHECK-NEXT: sel z16.d, p3, z29.d, z22.d +; CHECK-NEXT: fcmgt p4.h, p0/z, z19.h, z30.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z15.h, z30.h ; CHECK-NEXT: mov z26.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z11.d, p2/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p1.h, p0/z, z13.h, z29.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z13.h, z30.h ; CHECK-NEXT: fcmuo p6.h, p0/z, z17.h, z17.h ; CHECK-NEXT: str z26, [x8, #15, mul vl] -; CHECK-NEXT: sel z26.d, p4, z28.d, z14.d +; CHECK-NEXT: sel z26.d, p4, z29.d, z14.d ; CHECK-NEXT: str z11, [x8, #14, mul vl] -; CHECK-NEXT: mov z30.d, p3/m, z28.d -; CHECK-NEXT: fcmgt p2.h, p0/z, z3.h, z29.h +; CHECK-NEXT: mov z28.d, p3/m, z29.d +; CHECK-NEXT: fcmgt p2.h, p0/z, z3.h, z30.h ; CHECK-NEXT: fcmuo p4.h, p0/z, z13.h, z13.h ; CHECK-NEXT: fcmuo p3.h, p0/z, z3.h, z3.h -; CHECK-NEXT: sel z3.d, p1, z28.d, z21.d +; CHECK-NEXT: sel z3.d, p1, z29.d, z21.d ; CHECK-NEXT: mov z16.d, p6/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p12.h, p0/z, z27.h, z29.h -; CHECK-NEXT: sel z11.d, p2, z28.d, z20.d +; CHECK-NEXT: fcmgt p12.h, p0/z, z27.h, z30.h +; CHECK-NEXT: sel z11.d, p2, z29.d, z20.d ; CHECK-NEXT: str z16, [x8, #13, mul vl] ; CHECK-NEXT: mov z3.d, p4/m, #0 // =0x0 ; CHECK-NEXT: fcmuo p6.h, p0/z, z15.h, z15.h -; CHECK-NEXT: fcmgt p1.h, p0/z, z4.h, z29.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z4.h, z30.h ; CHECK-NEXT: mov z11.d, p3/m, #0 // =0x0 -; CHECK-NEXT: mov z9.d, p12/m, z28.d +; CHECK-NEXT: mov z9.d, p12/m, z29.d ; CHECK-NEXT: str z3, [x8, #11, mul vl] ; CHECK-NEXT: fcmuo p5.h, p0/z, z19.h, z19.h -; CHECK-NEXT: fcmgt p2.h, p0/z, z5.h, z29.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z5.h, z30.h ; CHECK-NEXT: str z11, [x8, #10, mul vl] -; CHECK-NEXT: mov z30.d, p6/m, #0 // =0x0 -; CHECK-NEXT: sel z3.d, p1, z28.d, z7.d -; CHECK-NEXT: fcmgt p4.h, p0/z, z6.h, z29.h +; CHECK-NEXT: mov z28.d, p6/m, #0 // =0x0 +; CHECK-NEXT: sel z3.d, p1, z29.d, z7.d +; CHECK-NEXT: fcmgt p4.h, p0/z, z6.h, z30.h ; CHECK-NEXT: fcmuo p3.h, p0/z, z27.h, z27.h -; CHECK-NEXT: str z30, [x8, #12, mul vl] +; CHECK-NEXT: str z28, [x8, #12, mul vl] ; CHECK-NEXT: mov z26.d, p5/m, #0 // =0x0 -; CHECK-NEXT: sel z7.d, p2, z28.d, z24.d -; CHECK-NEXT: fcmgt p6.h, p0/z, z31.h, z29.h -; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z29.h +; CHECK-NEXT: sel z7.d, p2, z29.d, z24.d +; CHECK-NEXT: fcmgt p6.h, p0/z, z31.h, z30.h +; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z30.h ; CHECK-NEXT: str z26, [x8, #9, mul vl] -; CHECK-NEXT: sel z24.d, p4, z28.d, z25.d +; CHECK-NEXT: sel z24.d, p4, z29.d, z25.d ; CHECK-NEXT: mov z9.d, p3/m, #0 // =0x0 ; CHECK-NEXT: fcmuo p5.h, p0/z, z31.h, z31.h ; CHECK-NEXT: fcmuo p2.h, p0/z, z6.h, z6.h -; CHECK-NEXT: mov z2.d, p6/m, z28.d +; CHECK-NEXT: mov z2.d, p6/m, z29.d ; CHECK-NEXT: str z9, [x8, #5, mul vl] -; CHECK-NEXT: mov z0.d, p1/m, z28.d +; CHECK-NEXT: mov z0.d, p1/m, z29.d ; CHECK-NEXT: fcmuo p3.h, p0/z, z5.h, z5.h ; CHECK-NEXT: fcmuo p4.h, p0/z, z4.h, z4.h ; CHECK-NEXT: mov z2.d, p5/m, #0 // =0x0 diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll index 5cca5539048b5..1ceaa5ad27734 100644 --- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll @@ -509,6 +509,294 @@ define @splat_nxv2bf16_imm() { ret splat(bfloat 1.0) } +define @splat_nzero_nxv2f16() { +; CHECK-LABEL: splat_nzero_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0x8000 +; CHECK-NEXT: ret + ret splat (half -0.0) +} + +define @splat_nzero_nxv4f16() { +; CHECK-LABEL: splat_nzero_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0x8000 +; CHECK-NEXT: ret + ret splat (half -0.0) +} + +define @splat_nzero_nxv8f16() { +; CHECK-LABEL: splat_nzero_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0x8000 +; CHECK-NEXT: ret + ret splat (half -0.0) +} + +define @splat_nzero_nxv2f32() { +; CHECK-LABEL: splat_nzero_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #0x80000000 +; CHECK-NEXT: ret + ret splat (float -0.0) +} + +define @splat_nzero_nxv4f32() { +; CHECK-LABEL: splat_nzero_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #0x80000000 +; CHECK-NEXT: ret + ret splat (float -0.0) +} + +define @splat_nzero_nxv2f64() { +; CHECK-LABEL: splat_nzero_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #0x8000000000000000 +; CHECK-NEXT: ret + ret splat (double -0.0) +} + +define @splat_nzero_nxv2bf16() { +; CHECK-LABEL: splat_nzero_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0x8000 +; CHECK-NEXT: ret + ret splat (bfloat -0.0) +} + +define @splat_nzero_nxv4bf16() { +; CHECK-LABEL: splat_nzero_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0x8000 +; CHECK-NEXT: ret + ret splat (bfloat -0.0) +} + +define @splat_nzero_nxv8bf16() { +; CHECK-LABEL: splat_nzero_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0x8000 +; CHECK-NEXT: ret + ret splat (bfloat -0.0) +} + +define @splat_pinf_nxv2f16() { +; CHECK-LABEL: splat_pinf_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0x7c00 +; CHECK-NEXT: ret + ret splat (half 0x7FF0000000000000) +} + +define @splat_pinf_nxv4f16() { +; CHECK-LABEL: splat_pinf_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0x7c00 +; CHECK-NEXT: ret + ret splat (half 0x7FF0000000000000) +} + +define @splat_pinf_nxv8f16() { +; CHECK-LABEL: splat_pinf_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0x7c00 +; CHECK-NEXT: ret + ret splat (half 0x7FF0000000000000) +} + +define @splat_pinf_nxv2f32() { +; CHECK-LABEL: splat_pinf_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #0x7f800000 +; CHECK-NEXT: ret + ret splat (float 0x7FF0000000000000) +} + +define @splat_pinf_nxv4f32() { +; CHECK-LABEL: splat_pinf_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #0x7f800000 +; CHECK-NEXT: ret + ret splat (float 0x7FF0000000000000) +} + +define @splat_pinf_nxv2f64() { +; CHECK-LABEL: splat_pinf_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #0x7ff0000000000000 +; CHECK-NEXT: ret + ret splat (double 0x7FF0000000000000) +} + +define @splat_pinf_nxv2bf16() { +; CHECK-LABEL: splat_pinf_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, #32640 // =0x7f80 +; CHECK-NEXT: ret + ret splat (bfloat 0x7FF0000000000000) +} + +define @splat_pinf_nxv4bf16() { +; CHECK-LABEL: splat_pinf_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, #32640 // =0x7f80 +; CHECK-NEXT: ret + ret splat (bfloat 0x7FF0000000000000) +} + +define @splat_pinf_nxv8bf16() { +; CHECK-LABEL: splat_pinf_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, #32640 // =0x7f80 +; CHECK-NEXT: ret + ret splat (bfloat 0x7FF0000000000000) +} + +define @splat_ninf_nxv2f16() { +; CHECK-LABEL: splat_ninf_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0xfc00 +; CHECK-NEXT: ret + ret splat (half 0xFFF0000000000000) +} + +define @splat_ninf_nxv4f16() { +; CHECK-LABEL: splat_ninf_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0xfc00 +; CHECK-NEXT: ret + ret splat (half 0xFFF0000000000000) +} + +define @splat_ninf_nxv8f16() { +; CHECK-LABEL: splat_ninf_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0xfc00 +; CHECK-NEXT: ret + ret splat (half 0xFFF0000000000000) +} + +define @splat_ninf_nxv2f32() { +; CHECK-LABEL: splat_ninf_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #0xff800000 +; CHECK-NEXT: ret + ret splat (float 0xFFF0000000000000) +} + +define @splat_ninf_nxv4f32() { +; CHECK-LABEL: splat_ninf_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #0xff800000 +; CHECK-NEXT: ret + ret splat (float 0xFFF0000000000000) +} + +define @splat_ninf_nxv2f64() { +; CHECK-LABEL: splat_ninf_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #0xfff0000000000000 +; CHECK-NEXT: ret + ret splat (double 0xFFF0000000000000) +} + +define @splat_ninf_nxv2bf16() { +; CHECK-LABEL: splat_ninf_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0xff80 +; CHECK-NEXT: ret + ret splat (bfloat 0xFFF0000000000000) +} + +define @splat_ninf_nxv4bf16() { +; CHECK-LABEL: splat_ninf_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0xff80 +; CHECK-NEXT: ret + ret splat (bfloat 0xFFF0000000000000) +} + +define @splat_ninf_nxv8bf16() { +; CHECK-LABEL: splat_ninf_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0xff80 +; CHECK-NEXT: ret + ret splat (bfloat 0xFFF0000000000000) +} + +define @splat_nan_nxv2f16() { +; CHECK-LABEL: splat_nan_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0x7e00 +; CHECK-NEXT: ret + ret splat (half 0x7FF8000000000000) +} + +define @splat_nan_nxv4f16() { +; CHECK-LABEL: splat_nan_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0x7e00 +; CHECK-NEXT: ret + ret splat (half 0x7FF8000000000000) +} + +define @splat_nan_nxv8f16() { +; CHECK-LABEL: splat_nan_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: dupm z0.h, #0x7e00 +; CHECK-NEXT: ret + ret splat (half 0x7FF8000000000000) +} + +define @splat_nan_nxv2f32() { +; CHECK-LABEL: splat_nan_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #0x7fc00000 +; CHECK-NEXT: ret + ret splat (float 0x7FF8000000000000) +} + +define @splat_nan_nxv4f32() { +; CHECK-LABEL: splat_nan_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #0x7fc00000 +; CHECK-NEXT: ret + ret splat (float 0x7FF8000000000000) +} + +define @splat_nan_nxv2f64() { +; CHECK-LABEL: splat_nan_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #0x7ff8000000000000 +; CHECK-NEXT: ret + ret splat (double 0x7FF8000000000000) +} + +define @splat_nan_nxv2bf16() { +; CHECK-LABEL: splat_nan_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, #32704 // =0x7fc0 +; CHECK-NEXT: ret + ret splat (bfloat 0x7FF8000000000000) +} + +define @splat_nan_nxv4bf16() { +; CHECK-LABEL: splat_nan_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, #32704 // =0x7fc0 +; CHECK-NEXT: ret + ret splat (bfloat 0x7FF8000000000000) +} + +define @splat_nan_nxv8bf16() { +; CHECK-LABEL: splat_nan_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, #32704 // =0x7fc0 +; CHECK-NEXT: ret + ret splat (bfloat 0x7FF8000000000000) +} + define @splat_nxv4i32_fold( %x) { ; CHECK-LABEL: splat_nxv4i32_fold: ; CHECK: // %bb.0: @@ -581,8 +869,8 @@ define @splat_nxv2f64_imm_out_of_range() { ; CHECK-LABEL: splat_nxv2f64_imm_out_of_range: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: adrp x8, .LCPI60_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI60_0 +; CHECK-NEXT: adrp x8, .LCPI96_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI96_0 ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret ret splat(double 3.33) diff --git a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll index 6b5b3d6d436cb..b04029c273ae2 100644 --- a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll @@ -338,8 +338,7 @@ ret %sel define @sel_merge_nxv8f16_negative_zero( %p, %in) { ; CHECK-LABEL: sel_merge_nxv8f16_negative_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 // =0x8000 -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: dupm z1.h, #0x8000 ; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret %sel = select %p, splat (half -0.0), %in @@ -349,8 +348,7 @@ ret %sel define @sel_merge_nx4f16_negative_zero( %p, %in) { ; CHECK-LABEL: sel_merge_nx4f16_negative_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 // =0x8000 -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: dupm z1.h, #0x8000 ; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret %sel = select %p, splat (half -0.0), %in @@ -360,8 +358,7 @@ ret %sel define @sel_merge_nx2f16_negative_zero( %p, %in) { ; CHECK-LABEL: sel_merge_nx2f16_negative_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 // =0x8000 -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: dupm z1.h, #0x8000 ; CHECK-NEXT: mov z0.d, p0/m, z1.d ; CHECK-NEXT: ret %sel = select %p, splat (half -0.0), %in @@ -371,8 +368,7 @@ ret %sel define @sel_merge_nx4f32_negative_zero( %p, %in) { ; CHECK-LABEL: sel_merge_nx4f32_negative_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 -; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: mov z1.s, #0x80000000 ; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret %sel = select %p, splat (float -0.0), %in @@ -382,8 +378,7 @@ ret %sel define @sel_merge_nx2f32_negative_zero( %p, %in) { ; CHECK-LABEL: sel_merge_nx2f32_negative_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 -; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: mov z1.s, #0x80000000 ; CHECK-NEXT: mov z0.d, p0/m, z1.d ; CHECK-NEXT: ret %sel = select %p, splat (float -0.0), %in @@ -393,8 +388,7 @@ ret %sel define @sel_merge_nx2f64_negative_zero( %p, %in) { ; CHECK-LABEL: sel_merge_nx2f64_negative_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 -; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: mov z1.d, #0x8000000000000000 ; CHECK-NEXT: mov z0.d, p0/m, z1.d ; CHECK-NEXT: ret %sel = select %p, splat (double -0.0), %in diff --git a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll index edfd80b4f2706..ace0c83e63c7c 100644 --- a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll @@ -4,20 +4,28 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; AARCH-LABEL: muloti_test: ; AARCH: // %bb.0: // %start +; AARCH-NEXT: orr x8, x1, x3 +; AARCH-NEXT: cbz x8, .LBB0_2 +; AARCH-NEXT: // %bb.1: // %overflow ; AARCH-NEXT: mul x9, x3, x0 ; AARCH-NEXT: cmp x1, #0 ; AARCH-NEXT: ccmp x3, #0, #4, ne -; AARCH-NEXT: umulh x8, x1, x2 -; AARCH-NEXT: umulh x10, x3, x0 +; AARCH-NEXT: umulh x10, x1, x2 +; AARCH-NEXT: umulh x8, x3, x0 ; AARCH-NEXT: madd x9, x1, x2, x9 -; AARCH-NEXT: ccmp xzr, x8, #0, eq -; AARCH-NEXT: umulh x11, x0, x2 ; AARCH-NEXT: ccmp xzr, x10, #0, eq +; AARCH-NEXT: umulh x11, x0, x2 +; AARCH-NEXT: ccmp xzr, x8, #0, eq ; AARCH-NEXT: mul x0, x0, x2 ; AARCH-NEXT: cset w8, ne ; AARCH-NEXT: adds x1, x11, x9 ; AARCH-NEXT: csinc w2, w8, wzr, lo ; AARCH-NEXT: ret +; AARCH-NEXT: .LBB0_2: // %overflow.no +; AARCH-NEXT: umulh x1, x0, x2 +; AARCH-NEXT: mul x0, x0, x2 +; AARCH-NEXT: mov w2, wzr +; AARCH-NEXT: ret start: %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2 %1 = extractvalue { i128, i1 } %0, 0 @@ -35,45 +43,56 @@ start: define i128 @__muloti4(i128 %0, i128 %1, ptr nocapture nonnull writeonly align 4 %2) #2 { ; AARCH-LABEL: __muloti4: ; AARCH: // %bb.0: // %Entry -; AARCH-NEXT: asr x11, x1, #63 -; AARCH-NEXT: asr x9, x3, #63 -; AARCH-NEXT: umulh x12, x0, x2 -; AARCH-NEXT: mov x8, x1 +; AARCH-NEXT: eor x8, x3, x2, asr #63 +; AARCH-NEXT: eor x9, x1, x0, asr #63 ; AARCH-NEXT: str wzr, [x4] -; AARCH-NEXT: mul x13, x1, x2 -; AARCH-NEXT: umulh x10, x1, x2 -; AARCH-NEXT: mul x11, x11, x2 -; AARCH-NEXT: adds x12, x13, x12 -; AARCH-NEXT: mul x15, x0, x3 -; AARCH-NEXT: umulh x14, x0, x3 -; AARCH-NEXT: adc x10, x10, x11 -; AARCH-NEXT: mul x9, x0, x9 -; AARCH-NEXT: mul x16, x1, x3 -; AARCH-NEXT: adds x1, x15, x12 -; AARCH-NEXT: asr x12, x10, #63 -; AARCH-NEXT: smulh x11, x8, x3 -; AARCH-NEXT: adc x9, x14, x9 -; AARCH-NEXT: asr x13, x9, #63 -; AARCH-NEXT: adds x9, x10, x9 -; AARCH-NEXT: asr x10, x1, #63 +; AARCH-NEXT: orr x8, x9, x8 +; AARCH-NEXT: cbz x8, .LBB1_2 +; AARCH-NEXT: // %bb.1: // %overflow +; AARCH-NEXT: asr x9, x1, #63 +; AARCH-NEXT: umulh x10, x0, x2 +; AARCH-NEXT: asr x13, x3, #63 +; AARCH-NEXT: mul x11, x1, x2 +; AARCH-NEXT: umulh x8, x1, x2 +; AARCH-NEXT: mul x9, x9, x2 +; AARCH-NEXT: adds x10, x11, x10 +; AARCH-NEXT: mul x14, x0, x3 +; AARCH-NEXT: umulh x12, x0, x3 +; AARCH-NEXT: adc x9, x8, x9 +; AARCH-NEXT: mul x13, x0, x13 +; AARCH-NEXT: adds x8, x14, x10 +; AARCH-NEXT: mul x15, x1, x3 +; AARCH-NEXT: smulh x10, x1, x3 +; AARCH-NEXT: adc x11, x12, x13 +; AARCH-NEXT: asr x12, x9, #63 +; AARCH-NEXT: asr x13, x11, #63 +; AARCH-NEXT: adds x9, x9, x11 +; AARCH-NEXT: asr x11, x8, #63 ; AARCH-NEXT: mul x0, x0, x2 ; AARCH-NEXT: adc x12, x12, x13 -; AARCH-NEXT: adds x9, x16, x9 -; AARCH-NEXT: adc x11, x11, x12 -; AARCH-NEXT: cmp x9, x10 -; AARCH-NEXT: ccmp x11, x10, #0, eq +; AARCH-NEXT: adds x9, x15, x9 +; AARCH-NEXT: adc x10, x10, x12 +; AARCH-NEXT: cmp x9, x11 +; AARCH-NEXT: ccmp x10, x11, #0, eq ; AARCH-NEXT: cset w9, ne -; AARCH-NEXT: tbz x8, #63, .LBB1_2 -; AARCH-NEXT: // %bb.1: // %Entry -; AARCH-NEXT: eor x8, x3, #0x8000000000000000 -; AARCH-NEXT: orr x8, x2, x8 -; AARCH-NEXT: cbz x8, .LBB1_3 -; AARCH-NEXT: .LBB1_2: // %Else2 -; AARCH-NEXT: cbz w9, .LBB1_4 -; AARCH-NEXT: .LBB1_3: // %Then7 -; AARCH-NEXT: mov w8, #1 // =0x1 -; AARCH-NEXT: str w8, [x4] -; AARCH-NEXT: .LBB1_4: // %Block9 +; AARCH-NEXT: tbnz x1, #63, .LBB1_3 +; AARCH-NEXT: b .LBB1_4 +; AARCH-NEXT: .LBB1_2: // %overflow.no +; AARCH-NEXT: smulh x8, x0, x2 +; AARCH-NEXT: mov w9, wzr +; AARCH-NEXT: mul x0, x0, x2 +; AARCH-NEXT: tbz x1, #63, .LBB1_4 +; AARCH-NEXT: .LBB1_3: // %overflow.res +; AARCH-NEXT: eor x10, x3, #0x8000000000000000 +; AARCH-NEXT: orr x10, x2, x10 +; AARCH-NEXT: cbz x10, .LBB1_5 +; AARCH-NEXT: .LBB1_4: // %Else2 +; AARCH-NEXT: cbz w9, .LBB1_6 +; AARCH-NEXT: .LBB1_5: // %Then7 +; AARCH-NEXT: mov w9, #1 // =0x1 +; AARCH-NEXT: str w9, [x4] +; AARCH-NEXT: .LBB1_6: // %Block9 +; AARCH-NEXT: mov x1, x8 ; AARCH-NEXT: ret Entry: store i32 0, ptr %2, align 4 diff --git a/llvm/test/CodeGen/DirectX/ddx_coarse-errors.ll b/llvm/test/CodeGen/DirectX/ddx_coarse-errors.ll new file mode 100644 index 0000000000000..0679eec31cec1 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ddx_coarse-errors.ll @@ -0,0 +1,15 @@ +; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s + +; DXIL operation ddx.coarse does not support double overload type +; CHECK: in function ddx.coarse +; CHECK-SAME: Cannot create DerivCoarseX operation: Invalid overload type + +; Function Attrs: noinline nounwind optnone +define noundef double @ddx.coarse_double(double noundef %a) #0 { +entry: + %a.addr = alloca double, align 8 + store double %a, ptr %a.addr, align 8 + %0 = load double, ptr %a.addr, align 8 + %dx.ddx.coarse = call double @llvm.dx.ddx.coarse.f64(double %0) + ret double %dx.ddx.coarse +} diff --git a/llvm/test/CodeGen/DirectX/ddx_coarse.ll b/llvm/test/CodeGen/DirectX/ddx_coarse.ll new file mode 100644 index 0000000000000..f6ea031273263 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ddx_coarse.ll @@ -0,0 +1,40 @@ +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + +; Make sure dxil operation function calls for ddx_coarse are generated for half/float and matching vectors + +define noundef half @deriv_coarse_x_half(half noundef %a) { +; CHECK: call half @dx.op.unary.f16(i32 83, half %{{.*}}) +entry: + %dx.ddx.coarse = call half @llvm.dx.ddx.coarse.f16(half %a) + ret half %dx.ddx.coarse +} + +define noundef float @deriv_coarse_x_float(float noundef %a) { +; CHECK: call float @dx.op.unary.f32(i32 83, float %{{.*}}) +entry: + %dx.ddx.coarse = call float @llvm.dx.ddx.coarse.f32(float %a) + ret float %dx.ddx.coarse +} + +define noundef <4 x float> @deriv_coarse_x_float4(<4 x float> noundef %a) { +; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 +; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee0]]) +; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 +; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee1]]) +; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 +; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee2]]) +; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 +; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee3]]) +; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 +; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 +; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 +; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 +; CHECK: ret <4 x float> %{{.*}} +entry: + %dx.ddx.coarse = call <4 x float> @llvm.dx.ddx.coarse.v4f32(<4 x float> %a) + ret <4 x float> %dx.ddx.coarse +} + +declare half @llvm.dx.ddx.coarse.f16(half) +declare float @llvm.dx.ddx.coarse.f32(float) +declare <4 x float> @llvm.dx.ddx.coarse.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/ddy_coarse-errors.ll b/llvm/test/CodeGen/DirectX/ddy_coarse-errors.ll new file mode 100644 index 0000000000000..df8e3eb0f7e0b --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ddy_coarse-errors.ll @@ -0,0 +1,15 @@ +; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s + +; DXIL operation ddy.coarse does not support double overload type +; CHECK: in function ddy.coarse +; CHECK-SAME: Cannot create DerivCoarseY operation: Invalid overload type + +; Function Attrs: noinline nounwind optnone +define noundef double @ddy.coarse_double(double noundef %a) #0 { +entry: + %a.addr = alloca double, align 8 + store double %a, ptr %a.addr, align 8 + %0 = load double, ptr %a.addr, align 8 + %dx.ddy.coarse = call double @llvm.dx.ddy.coarse.f64(double %0) + ret double %dx.ddy.coarse +} diff --git a/llvm/test/CodeGen/DirectX/ddy_coarse.ll b/llvm/test/CodeGen/DirectX/ddy_coarse.ll new file mode 100644 index 0000000000000..e3337022e1b01 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ddy_coarse.ll @@ -0,0 +1,40 @@ +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + +; Make sure dxil operation function calls for ddy_coarse are generated for half/float and matching vectors + +define noundef half @deriv_coarse_y_half(half noundef %a) { +; CHECK: call half @dx.op.unary.f16(i32 84, half %{{.*}}) +entry: + %dx.ddy.coarse = call half @llvm.dx.ddy.coarse.f16(half %a) + ret half %dx.ddy.coarse +} + +define noundef float @deriv_coarse_y_float(float noundef %a) { +; CHECK: call float @dx.op.unary.f32(i32 84, float %{{.*}}) +entry: + %dx.ddy.coarse = call float @llvm.dx.ddy.coarse.f32(float %a) + ret float %dx.ddy.coarse +} + +define noundef <4 x float> @deriv_coarse_y_float4(<4 x float> noundef %a) { +; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 +; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee0]]) +; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 +; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee1]]) +; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 +; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee2]]) +; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 +; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee3]]) +; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 +; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 +; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 +; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 +; CHECK: ret <4 x float> %{{.*}} +entry: + %dx.ddy.coarse = call <4 x float> @llvm.dx.ddy.coarse.v4f32(<4 x float> %a) + ret <4 x float> %dx.ddy.coarse +} + +declare half @llvm.dx.ddy.coarse.f16(half) +declare float @llvm.dx.ddy.coarse.f32(float) +declare <4 x float> @llvm.dx.ddy.coarse.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddx_coarse.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddx_coarse.ll new file mode 100644 index 0000000000000..478acb53701ea --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddx_coarse.ll @@ -0,0 +1,47 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val --target-env spv1.4 %} + +; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16 + +; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4 +; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4 + +define noundef float @ddx_coarse_float(float noundef %a) { +entry: +; CHECK: %[[#float_32_arg:]] = OpFunctionParameter %[[#float_32]] +; CHECK: %[[#]] = OpDPdxCoarse %[[#float_32]] %[[#float_32_arg]] + %elt.ddx.coarse = call float @llvm.spv.ddx.coarse.f32(float %a) + ret float %elt.ddx.coarse +} + +define noundef half @ddx_coarse_half(half noundef %a) { +entry: +; CHECK: %[[#float_16_arg:]] = OpFunctionParameter %[[#float_16]] +; CHECK: %[[#converted:]] = OpFConvert %[[#float_32:]] %[[#float_16_arg]] +; CHECK: %[[#coarse:]] = OpDPdxCoarse %[[#float_32]] %[[#converted]] +; CHECK: %[[#]] = OpFConvert %[[#float_16]] %[[#coarse]] + %elt.ddx.coarse = call half @llvm.spv.ddx.coarse.f16(half %a) + ret half %elt.ddx.coarse +} + +define noundef <4 x float> @ddx_coarse_float_vector(<4 x float> noundef %a) { +entry: +; CHECK: %[[#vec4_float_32_arg:]] = OpFunctionParameter %[[#vec4_float_32]] +; CHECK: %[[#]] = OpDPdxCoarse %[[#vec4_float_32]] %[[#vec4_float_32_arg]] + %elt.ddx.coarse = call <4 x float> @llvm.spv.ddx.coarse.v4f32(<4 x float> %a) + ret <4 x float> %elt.ddx.coarse +} + +define noundef <4 x half> @ddx_coarse_half_vector(<4 x half> noundef %a) { +entry: +; CHECK: %[[#vec4_float_16_arg:]] = OpFunctionParameter %[[#vec4_float_16]] +; CHECK: %[[#converted:]] = OpFConvert %[[#vec4_float_32:]] %[[#vec4_float_16_arg]] +; CHECK: %[[#coarse:]] = OpDPdxCoarse %[[#vec4_float_32]] %[[#converted]] +; CHECK: %[[#]] = OpFConvert %[[#vec4_float_16]] %[[#coarse]] + %elt.ddx.coarse = call <4 x half> @llvm.spv.ddx.coarse.v4f16(<4 x half> %a) + ret <4 x half> %elt.ddx.coarse +} + +declare float @llvm.spv.ddx.coarse.f32(float) +declare half @llvm.spv.ddx.coarse.f16(half) diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddy_coarse.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddy_coarse.ll new file mode 100644 index 0000000000000..8ad67cb644aa7 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddy_coarse.ll @@ -0,0 +1,47 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val --target-env spv1.4 %} + +; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16 + +; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4 +; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4 + +define noundef float @ddy_coarse_float(float noundef %a) { +entry: +; CHECK: %[[#float_32_arg:]] = OpFunctionParameter %[[#float_32]] +; CHECK: %[[#]] = OpDPdyCoarse %[[#float_32]] %[[#float_32_arg]] + %elt.ddy.coarse = call float @llvm.spv.ddy.coarse.f32(float %a) + ret float %elt.ddy.coarse +} + +define noundef half @ddy_coarse_half(half noundef %a) { +entry: +; CHECK: %[[#float_16_arg:]] = OpFunctionParameter %[[#float_16]] +; CHECK: %[[#converted:]] = OpFConvert %[[#float_32:]] %[[#float_16_arg]] +; CHECK: %[[#coarse:]] = OpDPdyCoarse %[[#float_32]] %[[#converted]] +; CHECK: %[[#]] = OpFConvert %[[#float_16]] %[[#coarse]] + %elt.ddy.coarse = call half @llvm.spv.ddy.coarse.f16(half %a) + ret half %elt.ddy.coarse +} + +define noundef <4 x float> @ddy_coarse_float_vector(<4 x float> noundef %a) { +entry: +; CHECK: %[[#vec4_float_32_arg:]] = OpFunctionParameter %[[#vec4_float_32]] +; CHECK: %[[#]] = OpDPdyCoarse %[[#vec4_float_32]] %[[#vec4_float_32_arg]] + %elt.ddy.coarse = call <4 x float> @llvm.spv.ddy.coarse.v4f32(<4 x float> %a) + ret <4 x float> %elt.ddy.coarse +} + +define noundef <4 x half> @ddy_coarse_half_vector(<4 x half> noundef %a) { +entry: +; CHECK: %[[#vec4_float_16_arg:]] = OpFunctionParameter %[[#vec4_float_16]] +; CHECK: %[[#converted:]] = OpFConvert %[[#vec4_float_32:]] %[[#vec4_float_16_arg]] +; CHECK: %[[#coarse:]] = OpDPdyCoarse %[[#vec4_float_32]] %[[#converted]] +; CHECK: %[[#]] = OpFConvert %[[#vec4_float_16]] %[[#coarse]] + %elt.ddy.coarse = call <4 x half> @llvm.spv.ddy.coarse.v4f16(<4 x half> %a) + ret <4 x half> %elt.ddy.coarse +} + +declare float @llvm.spv.ddy.coarse.f32(float) +declare half @llvm.spv.ddy.coarse.f16(half) diff --git a/llvm/test/CodeGen/SPIRV/opencl/ddx_coarse-error.ll b/llvm/test/CodeGen/SPIRV/opencl/ddx_coarse-error.ll new file mode 100644 index 0000000000000..e93c1d1ba4d36 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/opencl/ddx_coarse-error.ll @@ -0,0 +1,12 @@ +; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s +; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s + +; CHECK: LLVM ERROR: %{{.*}} = G_INTRINSIC intrinsic(@llvm.spv.ddx.coarse), %{{.*}} is only supported in shaders. + +define noundef float @ddx_coarse(float noundef %a) { +entry: + %spv.ddx.coarse = call float @llvm.spv.ddx.coarse.f32(float %a) + ret float %spv.ddx.coarse +} + +declare float @llvm.spv.ddx.coarse.f32(float) diff --git a/llvm/test/CodeGen/SPIRV/opencl/ddy_coarse-error.ll b/llvm/test/CodeGen/SPIRV/opencl/ddy_coarse-error.ll new file mode 100644 index 0000000000000..aa71a395d8680 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/opencl/ddy_coarse-error.ll @@ -0,0 +1,12 @@ +; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s +; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s + +; CHECK: LLVM ERROR: %{{.*}} = G_INTRINSIC intrinsic(@llvm.spv.ddy.coarse), %{{.*}} is only supported in shaders. + +define noundef float @ddy_coarse(float noundef %a) { +entry: + %spv.ddy.coarse = call float @llvm.spv.ddy.coarse.f32(float %a) + ret float %spv.ddy.coarse +} + +declare float @llvm.spv.ddy.coarse.f32(float) diff --git a/llvm/test/CodeGen/X86/bittest-big-integer.ll b/llvm/test/CodeGen/X86/bittest-big-integer.ll index b85a20b9d6b6e..023fb5065b892 100644 --- a/llvm/test/CodeGen/X86/bittest-big-integer.ll +++ b/llvm/test/CodeGen/X86/bittest-big-integer.ll @@ -1877,85 +1877,56 @@ define i32 @blsr_u512(ptr %word) nounwind { ; SSE: # %bb.0: ; SSE-NEXT: pushq %r15 ; SSE-NEXT: pushq %r14 -; SSE-NEXT: pushq %r12 ; SSE-NEXT: pushq %rbx -; SSE-NEXT: pushq %rax -; SSE-NEXT: movq 56(%rdi), %rcx -; SSE-NEXT: movq 48(%rdi), %rdx -; SSE-NEXT: movq 40(%rdi), %rsi -; SSE-NEXT: movq 32(%rdi), %r11 +; SSE-NEXT: movq 48(%rdi), %r11 +; SSE-NEXT: movq 40(%rdi), %r9 ; SSE-NEXT: movq 24(%rdi), %r8 -; SSE-NEXT: movq 16(%rdi), %r9 -; SSE-NEXT: movq (%rdi), %rax -; SSE-NEXT: movq 8(%rdi), %r10 -; SSE-NEXT: rep bsfq %rax, %rbx -; SSE-NEXT: rep bsfq %r10, %r14 -; SSE-NEXT: addq $64, %r14 -; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: cmovneq %rbx, %r14 -; SSE-NEXT: rep bsfq %r9, %r15 -; SSE-NEXT: rep bsfq %r8, %rbx +; SSE-NEXT: movq 16(%rdi), %rdx +; SSE-NEXT: movq (%rdi), %rcx +; SSE-NEXT: movq 8(%rdi), %rsi +; SSE-NEXT: rep bsfq %rcx, %rax +; SSE-NEXT: rep bsfq %rsi, %rbx ; SSE-NEXT: addq $64, %rbx -; SSE-NEXT: testq %r9, %r9 -; SSE-NEXT: cmovneq %r15, %rbx -; SSE-NEXT: subq $-128, %rbx -; SSE-NEXT: movq %rax, %r15 -; SSE-NEXT: movq %rax, %r12 -; SSE-NEXT: orq %r10, %r12 -; SSE-NEXT: cmovneq %r14, %rbx -; SSE-NEXT: rep bsfq %r11, %r12 -; SSE-NEXT: rep bsfq %rsi, %r14 -; SSE-NEXT: addq $64, %r14 -; SSE-NEXT: testq %r11, %r11 -; SSE-NEXT: cmovneq %r12, %r14 -; SSE-NEXT: xorps %xmm0, %xmm0 -; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp) -; SSE-NEXT: rep bsfq %rdx, %r12 +; SSE-NEXT: testq %rcx, %rcx +; SSE-NEXT: cmovneq %rax, %rbx +; SSE-NEXT: rep bsfq %rdx, %rax +; SSE-NEXT: rep bsfq %r8, %r10 +; SSE-NEXT: addq $64, %r10 +; SSE-NEXT: testq %rdx, %rdx +; SSE-NEXT: cmovneq %rax, %r10 +; SSE-NEXT: movq 32(%rdi), %r14 +; SSE-NEXT: subq $-128, %r10 +; SSE-NEXT: movq %rcx, %rax +; SSE-NEXT: orq %rsi, %rax +; SSE-NEXT: cmovneq %rbx, %r10 +; SSE-NEXT: rep bsfq %r14, %rax +; SSE-NEXT: rep bsfq %r9, %rbx +; SSE-NEXT: addq $64, %rbx +; SSE-NEXT: testq %r14, %r14 +; SSE-NEXT: cmovneq %rax, %rbx +; SSE-NEXT: rep bsfq %r11, %r15 ; SSE-NEXT: movl $64, %eax -; SSE-NEXT: rep bsfq %rcx, %rax +; SSE-NEXT: rep bsfq 56(%rdi), %rax ; SSE-NEXT: addq $64, %rax -; SSE-NEXT: testq %rdx, %rdx -; SSE-NEXT: cmovneq %r12, %rax +; SSE-NEXT: testq %r11, %r11 +; SSE-NEXT: cmovneq %r15, %rax ; SSE-NEXT: subq $-128, %rax -; SSE-NEXT: movq %r11, -{{[0-9]+}}(%rsp) -; SSE-NEXT: orq %rsi, %r11 -; SSE-NEXT: cmovneq %r14, %rax -; SSE-NEXT: addq $256, %rax # imm = 0x100 -; SSE-NEXT: movq %r10, -{{[0-9]+}}(%rsp) -; SSE-NEXT: orq %r8, %r10 -; SSE-NEXT: orq %r9, %r15 -; SSE-NEXT: orq %r10, %r15 +; SSE-NEXT: orq %r9, %r14 ; SSE-NEXT: cmovneq %rbx, %rax -; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movq %r9, -{{[0-9]+}}(%rsp) +; SSE-NEXT: addq $256, %rax # imm = 0x100 +; SSE-NEXT: orq %r8, %rsi +; SSE-NEXT: orq %rdx, %rcx +; SSE-NEXT: orq %rsi, %rcx +; SSE-NEXT: cmovneq %r10, %rax +; SSE-NEXT: movl $-2, %edx +; SSE-NEXT: movl %eax, %ecx +; SSE-NEXT: roll %cl, %edx ; SSE-NEXT: movl %eax, %ecx -; SSE-NEXT: andl $32, %ecx -; SSE-NEXT: movl %eax, %edx -; SSE-NEXT: andl $480, %edx # imm = 0x1E0 -; SSE-NEXT: shrl $3, %edx -; SSE-NEXT: movl %edx, %esi -; SSE-NEXT: andl $-8, %esi -; SSE-NEXT: movq -128(%rsp,%rsi), %r8 -; SSE-NEXT: shrq %cl, %r8 -; SSE-NEXT: movl -120(%rsp,%rsi), %esi -; SSE-NEXT: addl %esi, %esi -; SSE-NEXT: notl %ecx -; SSE-NEXT: # kill: def $cl killed $cl killed $ecx -; SSE-NEXT: shlq %cl, %rsi -; SSE-NEXT: orl %r8d, %esi -; SSE-NEXT: btrl %eax, %esi -; SSE-NEXT: movl %esi, (%rdi,%rdx) +; SSE-NEXT: shrl $3, %ecx +; SSE-NEXT: andl $60, %ecx +; SSE-NEXT: andl %edx, (%rdi,%rcx) ; SSE-NEXT: # kill: def $eax killed $eax killed $rax -; SSE-NEXT: addq $8, %rsp ; SSE-NEXT: popq %rbx -; SSE-NEXT: popq %r12 ; SSE-NEXT: popq %r14 ; SSE-NEXT: popq %r15 ; SSE-NEXT: retq @@ -1964,133 +1935,86 @@ define i32 @blsr_u512(ptr %word) nounwind { ; AVX2: # %bb.0: ; AVX2-NEXT: pushq %r15 ; AVX2-NEXT: pushq %r14 -; AVX2-NEXT: pushq %r13 -; AVX2-NEXT: pushq %r12 ; AVX2-NEXT: pushq %rbx -; AVX2-NEXT: movq 56(%rdi), %rcx -; AVX2-NEXT: movq 40(%rdi), %rdx -; AVX2-NEXT: movq 32(%rdi), %r11 -; AVX2-NEXT: movq 24(%rdi), %rsi -; AVX2-NEXT: movq 16(%rdi), %r8 -; AVX2-NEXT: movq (%rdi), %r9 -; AVX2-NEXT: movq 8(%rdi), %r10 -; AVX2-NEXT: xorl %ebx, %ebx -; AVX2-NEXT: tzcntq %r9, %rbx -; AVX2-NEXT: tzcntq %r10, %rax -; AVX2-NEXT: addq $64, %rax -; AVX2-NEXT: testq %r9, %r9 -; AVX2-NEXT: cmovneq %rbx, %rax -; AVX2-NEXT: xorl %r14d, %r14d -; AVX2-NEXT: tzcntq %r8, %r14 +; AVX2-NEXT: movq 40(%rdi), %r9 +; AVX2-NEXT: movq 32(%rdi), %r10 +; AVX2-NEXT: movq 24(%rdi), %r8 +; AVX2-NEXT: movq 16(%rdi), %rdx +; AVX2-NEXT: movq (%rdi), %rcx +; AVX2-NEXT: movq 8(%rdi), %rsi +; AVX2-NEXT: tzcntq %rcx, %rax ; AVX2-NEXT: xorl %ebx, %ebx ; AVX2-NEXT: tzcntq %rsi, %rbx ; AVX2-NEXT: addq $64, %rbx -; AVX2-NEXT: testq %r8, %r8 -; AVX2-NEXT: cmovneq %r14, %rbx -; AVX2-NEXT: subq $-128, %rbx -; AVX2-NEXT: movq %r9, %r14 -; AVX2-NEXT: movq %r9, %r15 -; AVX2-NEXT: orq %r10, %r15 +; AVX2-NEXT: testq %rcx, %rcx ; AVX2-NEXT: cmovneq %rax, %rbx ; AVX2-NEXT: xorl %eax, %eax -; AVX2-NEXT: tzcntq %r11, %rax -; AVX2-NEXT: xorl %r12d, %r12d -; AVX2-NEXT: tzcntq %rdx, %r12 -; AVX2-NEXT: addq $64, %r12 -; AVX2-NEXT: testq %r11, %r11 -; AVX2-NEXT: cmovneq %rax, %r12 -; AVX2-NEXT: movq 48(%rdi), %r15 -; AVX2-NEXT: xorl %r13d, %r13d -; AVX2-NEXT: tzcntq %r15, %r13 +; AVX2-NEXT: tzcntq %rdx, %rax +; AVX2-NEXT: tzcntq %r8, %r11 +; AVX2-NEXT: addq $64, %r11 +; AVX2-NEXT: testq %rdx, %rdx +; AVX2-NEXT: cmovneq %rax, %r11 +; AVX2-NEXT: subq $-128, %r11 +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: orq %rsi, %rax +; AVX2-NEXT: cmovneq %rbx, %r11 ; AVX2-NEXT: xorl %eax, %eax -; AVX2-NEXT: tzcntq %rcx, %rax +; AVX2-NEXT: tzcntq %r10, %rax +; AVX2-NEXT: xorl %ebx, %ebx +; AVX2-NEXT: tzcntq %r9, %rbx +; AVX2-NEXT: addq $64, %rbx +; AVX2-NEXT: testq %r10, %r10 +; AVX2-NEXT: cmovneq %rax, %rbx +; AVX2-NEXT: movq 48(%rdi), %r14 +; AVX2-NEXT: xorl %r15d, %r15d +; AVX2-NEXT: tzcntq %r14, %r15 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq 56(%rdi), %rax ; AVX2-NEXT: addq $64, %rax -; AVX2-NEXT: testq %r15, %r15 -; AVX2-NEXT: cmovneq %r13, %rax +; AVX2-NEXT: testq %r14, %r14 +; AVX2-NEXT: cmovneq %r15, %rax ; AVX2-NEXT: subq $-128, %rax -; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq %r11, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: orq %rdx, %r11 -; AVX2-NEXT: cmovneq %r12, %rax -; AVX2-NEXT: addq $256, %rax # imm = 0x100 -; AVX2-NEXT: movq %r10, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: orq %rsi, %r10 -; AVX2-NEXT: orq %r8, %r14 -; AVX2-NEXT: orq %r10, %r14 +; AVX2-NEXT: orq %r9, %r10 ; AVX2-NEXT: cmovneq %rbx, %rax -; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq %r9, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq %r15, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: addq $256, %rax # imm = 0x100 +; AVX2-NEXT: orq %r8, %rsi +; AVX2-NEXT: orq %rdx, %rcx +; AVX2-NEXT: orq %rsi, %rcx +; AVX2-NEXT: cmovneq %r11, %rax +; AVX2-NEXT: movl $-2, %edx +; AVX2-NEXT: movl %eax, %ecx +; AVX2-NEXT: roll %cl, %edx ; AVX2-NEXT: movl %eax, %ecx -; AVX2-NEXT: andl $32, %ecx -; AVX2-NEXT: movl %eax, %edx -; AVX2-NEXT: andl $480, %edx # imm = 0x1E0 -; AVX2-NEXT: shrl $3, %edx -; AVX2-NEXT: movl %edx, %esi -; AVX2-NEXT: andl $-8, %esi -; AVX2-NEXT: shrxq %rcx, -128(%rsp,%rsi), %r8 -; AVX2-NEXT: notl %ecx -; AVX2-NEXT: movl -120(%rsp,%rsi), %esi -; AVX2-NEXT: addl %esi, %esi -; AVX2-NEXT: shlxq %rcx, %rsi, %rcx -; AVX2-NEXT: orl %r8d, %ecx -; AVX2-NEXT: btrl %eax, %ecx -; AVX2-NEXT: movl %ecx, (%rdi,%rdx) +; AVX2-NEXT: shrl $3, %ecx +; AVX2-NEXT: andl $60, %ecx +; AVX2-NEXT: andl %edx, (%rdi,%rcx) ; AVX2-NEXT: # kill: def $eax killed $eax killed $rax ; AVX2-NEXT: popq %rbx -; AVX2-NEXT: popq %r12 -; AVX2-NEXT: popq %r13 ; AVX2-NEXT: popq %r14 ; AVX2-NEXT: popq %r15 -; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: blsr_u512: ; AVX512: # %bb.0: -; AVX512-NEXT: pushq %rax -; AVX512-NEXT: vmovups (%rdi), %ymm0 -; AVX512-NEXT: vmovups 32(%rdi), %ymm1 -; AVX512-NEXT: vmovdqu64 (%rdi), %zmm2 -; AVX512-NEXT: vpternlogd {{.*#+}} zmm3 = -1 -; AVX512-NEXT: vpaddq %zmm3, %zmm2, %zmm3 -; AVX512-NEXT: vpandnq %zmm3, %zmm2, %zmm3 -; AVX512-NEXT: vplzcntq %zmm3, %zmm3 -; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm4 = [64,128,192,256,320,384,448,512] -; AVX512-NEXT: vpsubq %zmm3, %zmm4, %zmm3 -; AVX512-NEXT: vptestmq %zmm2, %zmm2, %k1 -; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm2 = [512,512,512,512,512,512,512,512] -; AVX512-NEXT: vpcompressq %zmm3, %zmm2 {%k1} -; AVX512-NEXT: vmovq %xmm2, %rax -; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vmovdqu %ymm2, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovdqu %ymm2, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 +; AVX512-NEXT: vpternlogd {{.*#+}} zmm1 = -1 +; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm1 +; AVX512-NEXT: vpandnq %zmm1, %zmm0, %zmm1 +; AVX512-NEXT: vplzcntq %zmm1, %zmm1 +; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm2 = [64,128,192,256,320,384,448,512] +; AVX512-NEXT: vpsubq %zmm1, %zmm2, %zmm1 +; AVX512-NEXT: vptestmq %zmm0, %zmm0, %k1 +; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm0 = [512,512,512,512,512,512,512,512] +; AVX512-NEXT: vpcompressq %zmm1, %zmm0 {%k1} +; AVX512-NEXT: vmovq %xmm0, %rax +; AVX512-NEXT: movl $-2, %edx +; AVX512-NEXT: movl %eax, %ecx +; AVX512-NEXT: roll %cl, %edx ; AVX512-NEXT: movl %eax, %ecx -; AVX512-NEXT: andl $32, %ecx -; AVX512-NEXT: movl %ecx, %edx -; AVX512-NEXT: notl %edx -; AVX512-NEXT: movl %eax, %esi -; AVX512-NEXT: shrl $3, %esi -; AVX512-NEXT: movl %esi, %r8d -; AVX512-NEXT: andl $56, %r8d -; AVX512-NEXT: movl -120(%rsp,%r8), %r9d -; AVX512-NEXT: addl %r9d, %r9d -; AVX512-NEXT: shlxq %rdx, %r9, %rdx ; AVX512-NEXT: shrl $3, %ecx -; AVX512-NEXT: addq %rsp, %r8 -; AVX512-NEXT: addq $-128, %r8 -; AVX512-NEXT: orl (%rcx,%r8), %edx -; AVX512-NEXT: btrl %eax, %edx -; AVX512-NEXT: andl $60, %esi -; AVX512-NEXT: movl %edx, (%rdi,%rsi) +; AVX512-NEXT: andl $60, %ecx +; AVX512-NEXT: andl %edx, (%rdi,%rcx) ; AVX512-NEXT: # kill: def $eax killed $eax killed $rax -; AVX512-NEXT: popq %rcx ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %ld = load i512, ptr %word diff --git a/llvm/test/Transforms/InstSimplify/AArch64/aarch64-sve-reductions.ll b/llvm/test/Transforms/InstSimplify/AArch64/aarch64-sve-reductions.ll new file mode 100644 index 0000000000000..a54d6044d04b1 --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/AArch64/aarch64-sve-reductions.ll @@ -0,0 +1,912 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes=instsimplify < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; +; ANDV +; + +define i8 @andv_i8_no_active( %a) #0 { +; CHECK-LABEL: define i8 @andv_i8_no_active( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret i8 -1 +; + %out = call i8 @llvm.aarch64.sve.andv.nxv16i8( zeroinitializer, %a) + ret i8 %out +} + +define i8 @andv_i8_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @andv_i8_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 -1 +; + %out = call i8 @llvm.aarch64.sve.andv.nxv16i8( %pg, splat(i8 -1)) + ret i8 %out +} + +define i8 @andv_i8_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @andv_i8_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i8 @llvm.aarch64.sve.andv.nxv16i8( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i8 [[OUT]] +; + %out = call i8 @llvm.aarch64.sve.andv.nxv16i8( %pg, zeroinitializer) + ret i8 %out +} + +define i8 @andv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i8 @andv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 [[A]] +; + %a.insert = insertelement poison, i8 %a, i8 0 + %a.splat = shufflevector %a.insert, poison, zeroinitializer + %out = call i8 @llvm.aarch64.sve.andv.nxv16i8( splat (i1 true), %a.splat) + ret i8 %out +} + +define i16 @andv_i16_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @andv_i16_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i16 -1 +; + %out = call i16 @llvm.aarch64.sve.andv.nxv8i16( %pg, splat(i16 -1)) + ret i16 %out +} + +define i16 @andv_i16_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @andv_i16_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i16 @llvm.aarch64.sve.andv.nxv8i16( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i16 [[OUT]] +; + %out = call i16 @llvm.aarch64.sve.andv.nxv8i16( %pg, zeroinitializer) + ret i16 %out +} + +define i32 @andv_i32_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @andv_i32_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i32 -1 +; + %out = call i32 @llvm.aarch64.sve.andv.nxv4i32( %pg, splat(i32 -1)) + ret i32 %out +} + +define i32 @andv_i32_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @andv_i32_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i32 @llvm.aarch64.sve.andv.nxv4i32( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i32 [[OUT]] +; + %out = call i32 @llvm.aarch64.sve.andv.nxv4i32( %pg, zeroinitializer) + ret i32 %out +} + +define i64 @andv_i64_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @andv_i64_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 -1 +; + %out = call i64 @llvm.aarch64.sve.andv.nxv2i64( %pg, splat(i64 -1)) + ret i64 %out +} + +define i64 @andv_i64_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @andv_i64_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.andv.nxv2i64( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.andv.nxv2i64( %pg, zeroinitializer) + ret i64 %out +} + +; +; EORV +; + +define i8 @eorv_i8_no_active( %a) #0 { +; CHECK-LABEL: define i8 @eorv_i8_no_active( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 0 +; + %out = call i8 @llvm.aarch64.sve.eorv.nxv16i8( zeroinitializer, %a) + ret i8 %out +} + +define i8 @eorv_i8_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @eorv_i8_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 0 +; + %out = call i8 @llvm.aarch64.sve.eorv.nxv16i8( %pg, zeroinitializer) + ret i8 %out +} + +define i8 @eorv_i8_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @eorv_i8_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i8 @llvm.aarch64.sve.eorv.nxv16i8( [[PG]], splat (i8 1)) +; CHECK-NEXT: ret i8 [[OUT]] +; + %out = call i8 @llvm.aarch64.sve.eorv.nxv16i8( %pg, splat(i8 1)) + ret i8 %out +} + +define i8 @eorv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i8 @eorv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 0 +; + %a.insert = insertelement poison, i8 %a, i8 0 + %a.splat = shufflevector %a.insert, poison, zeroinitializer + %out = call i8 @llvm.aarch64.sve.eorv.nxv16i8( splat (i1 true), %a.splat) + ret i8 %out +} + +define i16 @eorv_i16_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @eorv_i16_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i16 0 +; + %out = call i16 @llvm.aarch64.sve.eorv.nxv8i16( %pg, zeroinitializer) + ret i16 %out +} + +define i16 @eorv_i16_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @eorv_i16_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i16 @llvm.aarch64.sve.eorv.nxv8i16( [[PG]], splat (i16 1)) +; CHECK-NEXT: ret i16 [[OUT]] +; + %out = call i16 @llvm.aarch64.sve.eorv.nxv8i16( %pg, splat(i16 1)) + ret i16 %out +} + +define i32 @eorv_i32_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @eorv_i32_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i32 0 +; + %out = call i32 @llvm.aarch64.sve.eorv.nxv4i32( %pg, zeroinitializer) + ret i32 %out +} + +define i32 @eorv_i32_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @eorv_i32_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i32 @llvm.aarch64.sve.eorv.nxv4i32( [[PG]], splat (i32 1)) +; CHECK-NEXT: ret i32 [[OUT]] +; + %out = call i32 @llvm.aarch64.sve.eorv.nxv4i32( %pg, splat(i32 1)) + ret i32 %out +} + +define i64 @eorv_i64_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @eorv_i64_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.eorv.nxv2i64( %pg, zeroinitializer) + ret i64 %out +} + +define i64 @eorv_i64_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @eorv_i64_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.eorv.nxv2i64( [[PG]], splat (i64 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.eorv.nxv2i64( %pg, splat(i64 1)) + ret i64 %out +} + +; +; ORV +; + +define i8 @orv_i8_no_active( %a) #0 { +; CHECK-LABEL: define i8 @orv_i8_no_active( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 0 +; + %out = call i8 @llvm.aarch64.sve.orv.nxv16i8( zeroinitializer, %a) + ret i8 %out +} + +define i8 @orv_i8_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @orv_i8_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 0 +; + %out = call i8 @llvm.aarch64.sve.orv.nxv16i8( %pg, zeroinitializer) + ret i8 %out +} + +define i8 @orv_i8_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @orv_i8_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i8 @llvm.aarch64.sve.orv.nxv16i8( [[PG]], splat (i8 1)) +; CHECK-NEXT: ret i8 [[OUT]] +; + %out = call i8 @llvm.aarch64.sve.orv.nxv16i8( %pg, splat(i8 1)) + ret i8 %out +} + +define i8 @orv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i8 @orv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 [[A]] +; + %a.insert = insertelement poison, i8 %a, i8 0 + %a.splat = shufflevector %a.insert, poison, zeroinitializer + %out = call i8 @llvm.aarch64.sve.orv.nxv16i8( splat (i1 true), %a.splat) + ret i8 %out +} + +define i16 @orv_i16_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @orv_i16_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i16 0 +; + %out = call i16 @llvm.aarch64.sve.orv.nxv8i16( %pg, zeroinitializer) + ret i16 %out +} + +define i16 @orv_i16_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @orv_i16_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i16 @llvm.aarch64.sve.orv.nxv8i16( [[PG]], splat (i16 1)) +; CHECK-NEXT: ret i16 [[OUT]] +; + %out = call i16 @llvm.aarch64.sve.orv.nxv8i16( %pg, splat(i16 1)) + ret i16 %out +} + +define i32 @orv_i32_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @orv_i32_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i32 0 +; + %out = call i32 @llvm.aarch64.sve.orv.nxv4i32( %pg, zeroinitializer) + ret i32 %out +} + +define i32 @orv_i32_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @orv_i32_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i32 @llvm.aarch64.sve.orv.nxv4i32( [[PG]], splat (i32 1)) +; CHECK-NEXT: ret i32 [[OUT]] +; + %out = call i32 @llvm.aarch64.sve.orv.nxv4i32( %pg, splat(i32 1)) + ret i32 %out +} + +define i64 @orv_i64_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @orv_i64_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.orv.nxv2i64( %pg, zeroinitializer) + ret i64 %out +} + +define i64 @orv_i64_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @orv_i64_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.orv.nxv2i64( [[PG]], splat (i64 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.orv.nxv2i64( %pg, splat(i64 1)) + ret i64 %out +} + +; +; SADDV +; + +define i64 @saddv_i8_no_active( %a) #0 { +; CHECK-LABEL: define i64 @saddv_i8_no_active( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv16i8( zeroinitializer, %a) + ret i64 %out +} + +define i64 @saddv_i8_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @saddv_i8_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv16i8( %pg, zeroinitializer) + ret i64 %out +} + +define i64 @saddv_i8_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @saddv_i8_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv16i8( [[PG]], splat (i8 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv16i8( %pg, splat(i8 1)) + ret i64 %out +} + +define i64 @saddv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i64 @saddv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[A_INSERT:%.*]] = insertelement poison, i8 [[A]], i8 0 +; CHECK-NEXT: [[A_SPLAT:%.*]] = shufflevector [[A_INSERT]], poison, zeroinitializer +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv16i8( splat (i1 true), [[A_SPLAT]]) +; CHECK-NEXT: ret i64 [[OUT]] +; + %a.insert = insertelement poison, i8 %a, i8 0 + %a.splat = shufflevector %a.insert, poison, zeroinitializer + %out = call i64 @llvm.aarch64.sve.saddv.nxv16i8( splat (i1 true), %a.splat) + ret i64 %out +} + +define i64 @saddv_i16_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @saddv_i16_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv8i16( %pg, zeroinitializer) + ret i64 %out +} + +define i64 @saddv_i16_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @saddv_i16_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv8i16( [[PG]], splat (i16 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv8i16( %pg, splat(i16 1)) + ret i64 %out +} + +define i64 @saddv_i32_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @saddv_i32_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv4i32( %pg, zeroinitializer) + ret i64 %out +} + +define i64 @saddv_i32_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @saddv_i32_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv4i32( [[PG]], splat (i32 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv4i32( %pg, splat(i32 1)) + ret i64 %out +} + +define i64 @saddv_i64_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @saddv_i64_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv2i64( %pg, zeroinitializer) + ret i64 %out +} + +define i64 @saddv_i64_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @saddv_i64_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.saddv.nxv2i64( [[PG]], splat (i64 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.saddv.nxv2i64( %pg, splat(i64 1)) + ret i64 %out +} + +; +; SMAXV +; + +define i8 @smaxv_i8_no_active( %a) #0 { +; CHECK-LABEL: define i8 @smaxv_i8_no_active( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 -128 +; + %out = call i8 @llvm.aarch64.sve.smaxv.nxv16i8( zeroinitializer, %a) + ret i8 %out +} + +define i8 @smaxv_i8_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @smaxv_i8_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 -128 +; + %out = call i8 @llvm.aarch64.sve.smaxv.nxv16i8( %pg, splat(i8 -128)) + ret i8 %out +} + +define i8 @smaxv_i8_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @smaxv_i8_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i8 @llvm.aarch64.sve.smaxv.nxv16i8( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i8 [[OUT]] +; + %out = call i8 @llvm.aarch64.sve.smaxv.nxv16i8( %pg, zeroinitializer) + ret i8 %out +} + +define i8 @smaxv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i8 @smaxv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 [[A]] +; + %a.insert = insertelement poison, i8 %a, i8 0 + %a.splat = shufflevector %a.insert, poison, zeroinitializer + %out = call i8 @llvm.aarch64.sve.smaxv.nxv16i8( splat (i1 true), %a.splat) + ret i8 %out +} + +define i16 @smaxv_i16_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @smaxv_i16_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i16 -32768 +; + %out = call i16 @llvm.aarch64.sve.smaxv.nxv8i16( %pg, splat(i16 -32768)) + ret i16 %out +} + +define i16 @smaxv_i16_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @smaxv_i16_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i16 @llvm.aarch64.sve.smaxv.nxv8i16( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i16 [[OUT]] +; + %out = call i16 @llvm.aarch64.sve.smaxv.nxv8i16( %pg, zeroinitializer) + ret i16 %out +} + +define i32 @smaxv_i32_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @smaxv_i32_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i32 -2147483648 +; + %out = call i32 @llvm.aarch64.sve.smaxv.nxv4i32( %pg, splat(i32 -2147483648)) + ret i32 %out +} + +define i32 @smaxv_i32_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @smaxv_i32_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i32 @llvm.aarch64.sve.smaxv.nxv4i32( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i32 [[OUT]] +; + %out = call i32 @llvm.aarch64.sve.smaxv.nxv4i32( %pg, zeroinitializer) + ret i32 %out +} + +define i64 @smaxv_i64_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @smaxv_i64_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 -9223372036854775808 +; + %out = call i64 @llvm.aarch64.sve.smaxv.nxv2i64( %pg, splat(i64 -9223372036854775808)) + ret i64 %out +} + +define i64 @smaxv_i64_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @smaxv_i64_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.smaxv.nxv2i64( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.smaxv.nxv2i64( %pg, zeroinitializer) + ret i64 %out +} + +; +; SMINV +; + +define i8 @sminv_i8_no_active( %a) #0 { +; CHECK-LABEL: define i8 @sminv_i8_no_active( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 127 +; + %out = call i8 @llvm.aarch64.sve.sminv.nxv16i8( zeroinitializer, %a) + ret i8 %out +} + +define i8 @sminv_i8_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @sminv_i8_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 127 +; + %out = call i8 @llvm.aarch64.sve.sminv.nxv16i8( %pg, splat(i8 127)) + ret i8 %out +} + +define i8 @sminv_i8_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @sminv_i8_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i8 @llvm.aarch64.sve.sminv.nxv16i8( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i8 [[OUT]] +; + %out = call i8 @llvm.aarch64.sve.sminv.nxv16i8( %pg, zeroinitializer) + ret i8 %out +} + +define i8 @sminv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i8 @sminv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 [[A]] +; + %a.insert = insertelement poison, i8 %a, i8 0 + %a.splat = shufflevector %a.insert, poison, zeroinitializer + %out = call i8 @llvm.aarch64.sve.sminv.nxv16i8( splat (i1 true), %a.splat) + ret i8 %out +} + +define i16 @sminv_i16_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @sminv_i16_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i16 32767 +; + %out = call i16 @llvm.aarch64.sve.sminv.nxv8i16( %pg, splat(i16 32767)) + ret i16 %out +} + +define i16 @sminv_i16_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @sminv_i16_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i16 @llvm.aarch64.sve.sminv.nxv8i16( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i16 [[OUT]] +; + %out = call i16 @llvm.aarch64.sve.sminv.nxv8i16( %pg, zeroinitializer) + ret i16 %out +} + +define i32 @sminv_i32_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @sminv_i32_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i32 2147483647 +; + %out = call i32 @llvm.aarch64.sve.sminv.nxv4i32( %pg, splat(i32 2147483647)) + ret i32 %out +} + +define i32 @sminv_i32_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @sminv_i32_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i32 @llvm.aarch64.sve.sminv.nxv4i32( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i32 [[OUT]] +; + %out = call i32 @llvm.aarch64.sve.sminv.nxv4i32( %pg, zeroinitializer) + ret i32 %out +} + +define i64 @sminv_i64_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @sminv_i64_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 9223372036854775807 +; + %out = call i64 @llvm.aarch64.sve.sminv.nxv2i64( %pg, splat(i64 9223372036854775807)) + ret i64 %out +} + +define i64 @sminv_i64_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @sminv_i64_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.sminv.nxv2i64( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.sminv.nxv2i64( %pg, zeroinitializer) + ret i64 %out +} + +; +; UADDV +; + +define i64 @uaddv_i8_no_active( %a) #0 { +; CHECK-LABEL: define i64 @uaddv_i8_no_active( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8( zeroinitializer, %a) + ret i64 %out +} + +define i64 @uaddv_i8_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @uaddv_i8_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8( %pg, zeroinitializer) + ret i64 %out +} + +define i64 @uaddv_i8_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @uaddv_i8_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv16i8( [[PG]], splat (i8 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8( %pg, splat(i8 1)) + ret i64 %out +} + +define i64 @uaddv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i64 @uaddv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[A_INSERT:%.*]] = insertelement poison, i8 [[A]], i8 0 +; CHECK-NEXT: [[A_SPLAT:%.*]] = shufflevector [[A_INSERT]], poison, zeroinitializer +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv16i8( splat (i1 true), [[A_SPLAT]]) +; CHECK-NEXT: ret i64 [[OUT]] +; + %a.insert = insertelement poison, i8 %a, i8 0 + %a.splat = shufflevector %a.insert, poison, zeroinitializer + %out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8( splat (i1 true), %a.splat) + ret i64 %out +} + +define i64 @uaddv_i16_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @uaddv_i16_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv8i16( %pg, zeroinitializer) + ret i64 %out +} + +define i64 @uaddv_i16_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @uaddv_i16_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv8i16( [[PG]], splat (i16 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv8i16( %pg, splat(i16 1)) + ret i64 %out +} + +define i64 @uaddv_i32_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @uaddv_i32_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv4i32( %pg, zeroinitializer) + ret i64 %out +} + +define i64 @uaddv_i32_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @uaddv_i32_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv4i32( [[PG]], splat (i32 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv4i32( %pg, splat(i32 1)) + ret i64 %out +} + +define i64 @uaddv_i64_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @uaddv_i64_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv2i64( %pg, zeroinitializer) + ret i64 %out +} + +define i64 @uaddv_i64_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @uaddv_i64_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uaddv.nxv2i64( [[PG]], splat (i64 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.uaddv.nxv2i64( %pg, splat(i64 1)) + ret i64 %out +} + +; +; UMAXV +; + +define i8 @umaxv_i8_no_active( %a) #0 { +; CHECK-LABEL: define i8 @umaxv_i8_no_active( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 0 +; + %out = call i8 @llvm.aarch64.sve.umaxv.nxv16i8( zeroinitializer, %a) + ret i8 %out +} + +define i8 @umaxv_i8_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @umaxv_i8_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 0 +; + %out = call i8 @llvm.aarch64.sve.umaxv.nxv16i8( %pg, zeroinitializer) + ret i8 %out +} + +define i8 @umaxv_i8_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @umaxv_i8_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i8 @llvm.aarch64.sve.umaxv.nxv16i8( [[PG]], splat (i8 1)) +; CHECK-NEXT: ret i8 [[OUT]] +; + %out = call i8 @llvm.aarch64.sve.umaxv.nxv16i8( %pg, splat(i8 1)) + ret i8 %out +} + +define i8 @umaxv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i8 @umaxv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 [[A]] +; + %a.insert = insertelement poison, i8 %a, i8 0 + %a.splat = shufflevector %a.insert, poison, zeroinitializer + %out = call i8 @llvm.aarch64.sve.umaxv.nxv16i8( splat (i1 true), %a.splat) + ret i8 %out +} + +define i16 @umaxv_i16_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @umaxv_i16_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i16 0 +; + %out = call i16 @llvm.aarch64.sve.umaxv.nxv8i16( %pg, zeroinitializer) + ret i16 %out +} + +define i16 @umaxv_i16_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @umaxv_i16_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i16 @llvm.aarch64.sve.umaxv.nxv8i16( [[PG]], splat (i16 1)) +; CHECK-NEXT: ret i16 [[OUT]] +; + %out = call i16 @llvm.aarch64.sve.umaxv.nxv8i16( %pg, splat(i16 1)) + ret i16 %out +} + +define i32 @umaxv_i32_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @umaxv_i32_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i32 0 +; + %out = call i32 @llvm.aarch64.sve.umaxv.nxv4i32( %pg, zeroinitializer) + ret i32 %out +} + +define i32 @umaxv_i32_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @umaxv_i32_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i32 @llvm.aarch64.sve.umaxv.nxv4i32( [[PG]], splat (i32 1)) +; CHECK-NEXT: ret i32 [[OUT]] +; + %out = call i32 @llvm.aarch64.sve.umaxv.nxv4i32( %pg, splat(i32 1)) + ret i32 %out +} + +define i64 @umaxv_i64_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @umaxv_i64_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 0 +; + %out = call i64 @llvm.aarch64.sve.umaxv.nxv2i64( %pg, zeroinitializer) + ret i64 %out +} + +define i64 @umaxv_i64_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @umaxv_i64_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.umaxv.nxv2i64( [[PG]], splat (i64 1)) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.umaxv.nxv2i64( %pg, splat(i64 1)) + ret i64 %out +} + +; +; UMINV +; + +define i8 @uminv_i8_no_active( %a) #0 { +; CHECK-LABEL: define i8 @uminv_i8_no_active( +; CHECK-SAME: [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 -1 +; + %out = call i8 @llvm.aarch64.sve.uminv.nxv16i8( zeroinitializer, %a) + ret i8 %out +} + +define i8 @uminv_i8_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @uminv_i8_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 -1 +; + %out = call i8 @llvm.aarch64.sve.uminv.nxv16i8( %pg, splat(i8 -1)) + ret i8 %out +} + +define i8 @uminv_i8_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i8 @uminv_i8_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i8 @llvm.aarch64.sve.uminv.nxv16i8( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i8 [[OUT]] +; + %out = call i8 @llvm.aarch64.sve.uminv.nxv16i8( %pg, zeroinitializer) + ret i8 %out +} + +define i8 @uminv_i8_all_active_splat(i8 %a) #0 { +; CHECK-LABEL: define i8 @uminv_i8_all_active_splat( +; CHECK-SAME: i8 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i8 [[A]] +; + %a.insert = insertelement poison, i8 %a, i8 0 + %a.splat = shufflevector %a.insert, poison, zeroinitializer + %out = call i8 @llvm.aarch64.sve.uminv.nxv16i8( splat (i1 true), %a.splat) + ret i8 %out +} + +define i16 @uminv_i16_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @uminv_i16_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i16 -1 +; + %out = call i16 @llvm.aarch64.sve.uminv.nxv8i16( %pg, splat(i16 -1)) + ret i16 %out +} + +define i16 @uminv_i16_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i16 @uminv_i16_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i16 @llvm.aarch64.sve.uminv.nxv8i16( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i16 [[OUT]] +; + %out = call i16 @llvm.aarch64.sve.uminv.nxv8i16( %pg, zeroinitializer) + ret i16 %out +} + +define i32 @uminv_i32_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @uminv_i32_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i32 -1 +; + %out = call i32 @llvm.aarch64.sve.uminv.nxv4i32( %pg, splat(i32 -1)) + ret i32 %out +} + +define i32 @uminv_i32_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i32 @uminv_i32_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i32 @llvm.aarch64.sve.uminv.nxv4i32( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i32 [[OUT]] +; + %out = call i32 @llvm.aarch64.sve.uminv.nxv4i32( %pg, zeroinitializer) + ret i32 %out +} + +define i64 @uminv_i64_splat_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @uminv_i64_splat_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: ret i64 -1 +; + %out = call i64 @llvm.aarch64.sve.uminv.nxv2i64( %pg, splat(i64 -1)) + ret i64 %out +} + +define i64 @uminv_i64_splat_non_neutral_val( %pg) #0 { +; CHECK-LABEL: define i64 @uminv_i64_splat_non_neutral_val( +; CHECK-SAME: [[PG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OUT:%.*]] = call i64 @llvm.aarch64.sve.uminv.nxv2i64( [[PG]], zeroinitializer) +; CHECK-NEXT: ret i64 [[OUT]] +; + %out = call i64 @llvm.aarch64.sve.uminv.nxv2i64( %pg, zeroinitializer) + ret i64 %out +} + +attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/Transforms/InstSimplify/AArch64/lit.local.cfg b/llvm/test/Transforms/InstSimplify/AArch64/lit.local.cfg new file mode 100644 index 0000000000000..10d4a0e953ed4 --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/AArch64/lit.local.cfg @@ -0,0 +1,2 @@ +if not "AArch64" in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll index 20676f3702294..10c265519952b 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll @@ -14,23 +14,23 @@ define void @foo(i64 %n) { ; CHECK-EMPTY: ; CHECK-NEXT: outer.header: ; CHECK-NEXT: EMIT-SCALAR ir<%outer.iv> = phi [ ir<%outer.iv.next>, outer.latch ], [ ir<0>, ir-bb ] -; CHECK-NEXT: EMIT ir<%gep.1> = getelementptr ir<@arr2>, ir<0>, ir<%outer.iv> +; CHECK-NEXT: EMIT ir<%gep.1> = getelementptr inbounds ir<@arr2>, ir<0>, ir<%outer.iv> ; CHECK-NEXT: EMIT store ir<%outer.iv>, ir<%gep.1> -; CHECK-NEXT: EMIT ir<%add> = add ir<%outer.iv>, ir<%n> +; CHECK-NEXT: EMIT ir<%add> = add nsw ir<%outer.iv>, ir<%n> ; CHECK-NEXT: Successor(s): inner ; CHECK-EMPTY: ; CHECK-NEXT: inner: ; CHECK-NEXT: EMIT-SCALAR ir<%inner.iv> = phi [ ir<%inner.iv.next>, inner ], [ ir<0>, outer.header ] -; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv> +; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr inbounds ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv> ; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2> -; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1> -; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8> +; CHECK-NEXT: EMIT ir<%inner.iv.next> = add nuw nsw ir<%inner.iv>, ir<1> +; CHECK-NEXT: EMIT ir<%inner.ec> = icmp eq ir<%inner.iv.next>, ir<8> ; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec> ; CHECK-NEXT: Successor(s): outer.latch, inner ; CHECK-EMPTY: ; CHECK-NEXT: outer.latch: -; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1> -; CHECK-NEXT: EMIT ir<%outer.ec> = icmp ir<%outer.iv.next>, ir<8> +; CHECK-NEXT: EMIT ir<%outer.iv.next> = add nuw nsw ir<%outer.iv>, ir<1> +; CHECK-NEXT: EMIT ir<%outer.ec> = icmp eq ir<%outer.iv.next>, ir<8> ; CHECK-NEXT: EMIT branch-on-cond ir<%outer.ec> ; CHECK-NEXT: Successor(s): ir-bb, outer.header ; CHECK-EMPTY: diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp index b99d656c5c50f..5742df2aa3c53 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -139,12 +139,12 @@ compound=true "vector.body:\l" + " EMIT vp\<%2\> = CANONICAL-INDUCTION ir\<0\>, vp\<%index.next\>\l" + " EMIT-SCALAR ir\<%indvars.iv\> = phi [ ir\<0\>, vector.ph ], [ ir\<%indvars.iv.next\>, vector.body ]\l" + - " EMIT ir\<%arr.idx\> = getelementptr ir\<%A\>, ir\<%indvars.iv\>\l" + + " EMIT ir\<%arr.idx\> = getelementptr inbounds ir\<%A\>, ir\<%indvars.iv\>\l" + " EMIT ir\<%l1\> = load ir\<%arr.idx\>\l" + " EMIT ir\<%res\> = add ir\<%l1\>, ir\<10\>\l" + " EMIT store ir\<%res\>, ir\<%arr.idx\>\l" + " EMIT ir\<%indvars.iv.next\> = add ir\<%indvars.iv\>, ir\<1\>\l" + - " EMIT ir\<%exitcond\> = icmp ir\<%indvars.iv.next\>, ir\<%N\>\l" + + " EMIT ir\<%exitcond\> = icmp ne ir\<%indvars.iv.next\>, ir\<%N\>\l" + " EMIT vp\<%3\> = not ir\<%exitcond\>\l" + " EMIT vp\<%index.next\> = add nuw vp\<%2\>, vp\<%0\>\l" + " EMIT branch-on-count vp\<%index.next\>, vp\<%1\>\l" + @@ -305,9 +305,9 @@ compound=true "vector.body:\l" + " EMIT vp\<%2\> = CANONICAL-INDUCTION ir\<0\>, vp\<%index.next\>\l" + " EMIT-SCALAR ir\<%iv\> = phi [ ir\<0\>, vector.ph ], [ ir\<%iv.next\>, loop.latch ]\l" + - " EMIT ir\<%arr.idx\> = getelementptr ir\<%A\>, ir\<%iv\>\l" + + " EMIT ir\<%arr.idx\> = getelementptr inbounds ir\<%A\>, ir\<%iv\>\l" + " EMIT ir\<%l1\> = load ir\<%arr.idx\>\l" + - " EMIT ir\<%c\> = icmp ir\<%l1\>, ir\<0\>\l" + + " EMIT ir\<%c\> = icmp eq ir\<%l1\>, ir\<0\>\l" + "Successor(s): loop.latch\l" ] N4 -> N6 [ label=""] @@ -316,7 +316,7 @@ compound=true " EMIT ir\<%res\> = add ir\<%l1\>, ir\<10\>\l" + " EMIT store ir\<%res\>, ir\<%arr.idx\>\l" + " EMIT ir\<%iv.next\> = add ir\<%iv\>, ir\<1\>\l" + - " EMIT ir\<%exitcond\> = icmp ir\<%iv.next\>, ir\<%N\>\l" + + " EMIT ir\<%exitcond\> = icmp ne ir\<%iv.next\>, ir\<%N\>\l" + " EMIT vp\<%3\> = not ir\<%exitcond\>\l" + " EMIT vp\<%index.next\> = add nuw vp\<%2\>, vp\<%0\>\l" + " EMIT branch-on-count vp\<%index.next\>, vp\<%1\>\l" + diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index 3842ba235ead3..63776b78a2088 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -1009,7 +1009,7 @@ TEST_F(VPRecipeTest, CastVPWidenRecipeToVPUser) { SmallVector Args; Args.push_back(Op1); Args.push_back(Op2); - VPWidenRecipe WidenR(*AI, Args, VPIRMetadata(), DebugLoc()); + VPWidenRecipe WidenR(*AI, Args); checkVPRecipeCastImpl(&WidenR); delete AI; @@ -1053,7 +1053,7 @@ TEST_F(VPRecipeTest, CastVPWidenSelectRecipeToVPUserAndVPDef) { Args.push_back(Op1); Args.push_back(Op2); Args.push_back(Op3); - VPWidenSelectRecipe WidenSelectR(*SelectI, + VPWidenSelectRecipe WidenSelectR(SelectI, make_range(Args.begin(), Args.end())); checkVPRecipeCastImpl( @@ -1093,7 +1093,7 @@ TEST_F(VPRecipeTest, CastVPWidenCastRecipeToVPUser) { IntegerType *Int64 = IntegerType::get(C, 64); auto *Cast = CastInst::CreateZExtOrBitCast(PoisonValue::get(Int32), Int64); VPValue *Op1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); - VPWidenCastRecipe Recipe(Instruction::ZExt, Op1, Int64, *Cast, {}); + VPWidenCastRecipe Recipe(Instruction::ZExt, Op1, Int64, Cast); checkVPRecipeCastImpl(&Recipe); delete Cast; @@ -1264,7 +1264,7 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { SmallVector Args; Args.push_back(Op1); Args.push_back(Op2); - VPWidenRecipe Recipe(*AI, Args, VPIRMetadata(), DebugLoc()); + VPWidenRecipe Recipe(*AI, Args); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); EXPECT_FALSE(Recipe.mayWriteToMemory()); @@ -1283,7 +1283,7 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { Args.push_back(Op1); Args.push_back(Op2); Args.push_back(Op3); - VPWidenSelectRecipe Recipe(*SelectI, make_range(Args.begin(), Args.end())); + VPWidenSelectRecipe Recipe(SelectI, make_range(Args.begin(), Args.end())); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); EXPECT_FALSE(Recipe.mayWriteToMemory()); @@ -1412,7 +1412,7 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { Args.push_back(Op1); Args.push_back(Op2); Args.push_back(CalledFn); - VPWidenCallRecipe Recipe(Call, TheFn, Args); + VPWidenCallRecipe Recipe(Call, TheFn, Args, VPIRFlags(), VPIRMetadata()); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); EXPECT_FALSE(Recipe.mayWriteToMemory()); @@ -1468,8 +1468,7 @@ TEST_F(VPRecipeTest, dumpRecipeInPlan) { VPValue *ExtVPV2 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); Args.push_back(ExtVPV1); Args.push_back(ExtVPV2); - VPWidenRecipe *WidenR = - new VPWidenRecipe(*AI, Args, VPIRMetadata(), DebugLoc()); + VPWidenRecipe *WidenR = new VPWidenRecipe(*AI, Args); VPBB1->appendRecipe(WidenR); { diff --git a/llvm/utils/TableGen/Basic/TableGen.cpp b/llvm/utils/TableGen/Basic/TableGen.cpp index b79ae93dab4f7..a655cbbc16096 100644 --- a/llvm/utils/TableGen/Basic/TableGen.cpp +++ b/llvm/utils/TableGen/Basic/TableGen.cpp @@ -73,7 +73,7 @@ int tblgen_main(int argc, char **argv) { InitLLVM X(argc, argv); cl::ParseCommandLineOptions(argc, argv); - std::function MainFn = nullptr; + MultiFileTableGenMainFn MainFn = nullptr; return TableGenMain(argv[0], MainFn); } diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp index ef7b13e8940f8..3486a7a7fb08c 100644 --- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp +++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp @@ -1878,6 +1878,8 @@ TableGenOutputFiles RegisterInfoEmitter::run(StringRef FilenamePrefix) { if (RegisterInfoDebug) debugDump(errs()); + // The suffixes should be in sync with the tablegen function in + // llvm/cmake/modules/TableGen.cmake. return {Main, {{"Enums.inc", Enums}, {"MCDesc.inc", MCDesc}, diff --git a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td index 14b00b04ccc18..420e58192b8fd 100644 --- a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td @@ -166,4 +166,27 @@ def TosaAttachTarget : Pass<"tosa-attach-target", "ModuleOp"> { ]; } +def TosaNarrowI64ToI32Pass : Pass<"tosa-narrow-i64-to-i32", "func::FuncOp"> { + let summary = "Narrow I64 TOSA operations to I32"; + let description = [{ + This pass narrows TOSA operations with 64-bit integer tensor types to + 32-bit integer tensor types. This can be useful for backends that do not + support the EXT-INT64 extension of TOSA. + }]; + + let options = [ + Option<"aggressiveRewrite", "aggressive-rewrite", "bool", "false", + "If enabled, all TOSA operations are rewritten, regardless or whether the narrowing" + "is safe. This option may lead to data loss if not used carefully.">, + Option<"convertFunctionBoundaries", "convert-function-boundaries", "bool", "false", + "If enabled, the pass will convert function I/O types as well. Otherwise casts will" + "be inserted at the I/O boundaries."> + ]; + + let dependentDialects = [ + "func::FuncDialect", + "tosa::TosaDialect", + ]; +} + #endif // MLIR_DIALECT_TOSA_TRANSFORMS_PASSES diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp index edc6565f44f00..b9a5e7d7f6eac 100644 --- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp +++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp @@ -1738,15 +1738,11 @@ LogicalResult ScaledExtPacked816OpLowering::matchAndRewrite( auto sourceType = cast(op.getSource().getType()); auto srcElemType = cast(sourceType.getElementType()); unsigned bitWidth = srcElemType.getWidth(); - int32_t scaleSel = - getScaleSel(blockSize, bitWidth, firstScaleLane, firstScaleByte); auto targetType = cast(op.getResult().getType()); auto destElemType = cast(targetType.getElementType()); - IntegerType i32 = rewriter.getI32Type(); - Value castedScale = - LLVM::BitcastOp::create(rewriter, loc, i32, adaptor.getScale()); + IntegerType i32 = rewriter.getI32Type(); Value source = adaptor.getSource(); Type llvmResultType = typeConverter->convertType(op.getResult().getType()); Type packedType = nullptr; @@ -1767,15 +1763,19 @@ LogicalResult ScaledExtPacked816OpLowering::matchAndRewrite( return rewriter.notifyMatchFailure(op, "type conversion failed"); } - Value castedSource = - LLVM::BitcastOp::create(rewriter, loc, packedType, source); - std::optional maybeIntrinsic = scaledExtPacked816ToIntrinsic(srcElemType, destElemType); if (!maybeIntrinsic.has_value()) return op.emitOpError( "no intrinsic matching packed scaled conversion on the given chipset"); + int32_t scaleSel = + getScaleSel(blockSize, bitWidth, firstScaleLane, firstScaleByte); + Value castedScale = + LLVM::BitcastOp::create(rewriter, loc, i32, adaptor.getScale()); + Value castedSource = + LLVM::BitcastOp::create(rewriter, loc, packedType, source); + OperationState loweredOp(loc, *maybeIntrinsic); loweredOp.addTypes({llvmResultType}); loweredOp.addOperands({castedSource, castedScale}); diff --git a/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt index 41b338d6e7189..76e9ddd5b2304 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt @@ -12,6 +12,7 @@ add_mlir_dialect_library(MLIRTosaTransforms TosaTypeConverters.cpp TosaProfileCompliance.cpp TosaValidation.cpp + TosaNarrowI64ToI32.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Tosa/Transforms @@ -21,6 +22,7 @@ add_mlir_dialect_library(MLIRTosaTransforms LINK_LIBS PUBLIC MLIRFuncDialect + MLIRFuncTransformOps MLIRPass MLIRTosaDialect MLIRTransformUtils diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaNarrowI64ToI32.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaNarrowI64ToI32.cpp new file mode 100644 index 0000000000000..ddaf7d8a5e033 --- /dev/null +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaNarrowI64ToI32.cpp @@ -0,0 +1,310 @@ +//===- TosaNarrowI64ToI32.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass narrows TOSA operations with 64-bit integer tensor types to +// 32-bit integer tensor types. This can be useful for backends that do not +// support the EXT-INT64 extension of TOSA. The pass has two options: +// +// - aggressive-rewrite - If enabled, all TOSA operations are rewritten, +// regardless or whether the narrowing is safe. This option may lead to +// data loss if not used carefully. +// - convert-function-boundaries - If enabled, the pass will convert function +// I/O types as well. Otherwise casts will be inserted at the I/O +// boundaries. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Tosa/Transforms/Passes.h" + +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/Func/Transforms/FuncConversions.h" +#include "mlir/IR/Verifier.h" +#include "mlir/Pass/Pass.h" + +namespace mlir { +namespace tosa { +#define GEN_PASS_DEF_TOSANARROWI64TOI32PASS +#include "mlir/Dialect/Tosa/Transforms/Passes.h.inc" +} // namespace tosa +} // namespace mlir + +using namespace mlir; +using namespace mlir::tosa; + +namespace { + +LogicalResult convertGenericOp(Operation *op, ValueRange operands, + ConversionPatternRewriter &rewriter, + const TypeConverter *typeConverter) { + // Convert types of results + SmallVector newResults; + if (failed(typeConverter->convertTypes(op->getResultTypes(), newResults))) + return failure(); + + // Create a new operation state + OperationState state(op->getLoc(), op->getName().getStringRef(), operands, + newResults, {}, op->getSuccessors()); + + for (const NamedAttribute &namedAttribute : op->getAttrs()) { + const Attribute attribute = namedAttribute.getValue(); + + // Convert integer attribute type + if (const auto intAttr = dyn_cast(attribute)) { + const std::optional convertedAttribute = + typeConverter->convertTypeAttribute(intAttr.getType(), attribute); + state.addAttribute(namedAttribute.getName(), convertedAttribute.value()); + continue; + } + + if (const auto typeAttr = dyn_cast(attribute)) { + Type type = typeAttr.getValue(); + const std::optional convertedAttribute = + typeConverter->convertTypeAttribute(type, attribute); + if (!convertedAttribute) + return rewriter.notifyMatchFailure(op, + "Failed to convert type attribute."); + state.addAttribute(namedAttribute.getName(), convertedAttribute.value()); + continue; + } + + if (const auto denseElementsAttr = dyn_cast(attribute)) { + const Type type = denseElementsAttr.getType(); + const std::optional convertedAttribute = + typeConverter->convertTypeAttribute(type, denseElementsAttr); + if (!convertedAttribute) + return rewriter.notifyMatchFailure( + op, "Failed to convert dense elements attribute."); + state.addAttribute(namedAttribute.getName(), convertedAttribute.value()); + continue; + } + + state.addAttribute(namedAttribute.getName(), attribute); + } + + for (Region ®ion : op->getRegions()) { + Region *newRegion = state.addRegion(); + rewriter.inlineRegionBefore(region, *newRegion, newRegion->begin()); + if (failed(rewriter.convertRegionTypes(newRegion, *typeConverter))) + return failure(); + } + + Operation *newOp = rewriter.create(state); + rewriter.replaceOp(op, newOp->getResults()); + return success(); +} + +// =========================== +// Aggressive rewrite patterns +// =========================== + +class ConvertGenericOp : public ConversionPattern { +public: + ConvertGenericOp(TypeConverter &typeConverter, MLIRContext *context) + : ConversionPattern(typeConverter, MatchAnyOpTypeTag{}, 0, context) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + if (!isa(op)) + return rewriter.notifyMatchFailure( + op, + "Support for operations other than TOSA has not been implemented."); + + return convertGenericOp(op, operands, rewriter, typeConverter); + } +}; + +// =============================== +// Bounds checked rewrite patterns +// =============================== + +class ConvertArgMaxOpWithBoundsChecking + : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(tosa::ArgMaxOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const final { + // Output type can be narrowed based on the size of the axis dimension + const int32_t axis = op.getAxis(); + const auto inputType = dyn_cast(adaptor.getInput().getType()); + if (!inputType || !inputType.isStaticDim(axis)) + return rewriter.notifyMatchFailure( + op, "Requires a static axis dimension for bounds checking."); + const int64_t axisDim = inputType.getDimSize(axis); + if (axisDim >= std::numeric_limits::max()) + return rewriter.notifyMatchFailure( + op, "Axis dimension is too large to narrow safely."); + + const Type resultType = op.getOutput().getType(); + const Type newResultType = typeConverter->convertType(resultType); + rewriter.replaceOpWithNewOp(op, newResultType, + adaptor.getInput(), axis); + return success(); + } +}; + +class ConvertCastOpWithBoundsChecking + : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(tosa::CastOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const final { + const auto inputType = dyn_cast(adaptor.getInput().getType()); + const auto resultType = dyn_cast(op.getResult().getType()); + if (!inputType || !resultType) + return failure(); + + const auto elementInputIntType = + dyn_cast(inputType.getElementType()); + const auto elementResultIntType = + dyn_cast(resultType.getElementType()); + if (elementInputIntType && elementResultIntType && + elementInputIntType.getWidth() > elementResultIntType.getWidth()) + return rewriter.notifyMatchFailure( + op, "Narrowing cast may lead to data loss."); + + rewriter.replaceOpWithNewOp( + op, typeConverter->convertType(resultType), adaptor.getInput()); + return success(); + } +}; + +template +class ConvertTypedOp : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(OpTy op, typename OpTy::Adaptor adaptor, + ConversionPatternRewriter &rewriter) const final { + return convertGenericOp(op, adaptor.getOperands(), rewriter, + this->getTypeConverter()); + } +}; + +struct TosaNarrowI64ToI32 + : public tosa::impl::TosaNarrowI64ToI32PassBase { +public: + explicit TosaNarrowI64ToI32() = default; + explicit TosaNarrowI64ToI32(const TosaNarrowI64ToI32PassOptions &options) + : TosaNarrowI64ToI32() { + this->aggressiveRewrite = options.aggressiveRewrite; + this->convertFunctionBoundaries = options.convertFunctionBoundaries; + } + + void runOnOperation() override { + MLIRContext *context = &getContext(); + + TypeConverter typeConverter; + typeConverter.addConversion([](Type type) -> Type { return type; }); + typeConverter.addConversion([](IntegerType type) -> Type { + if (!type.isInteger(64)) + return type; + return IntegerType::get(type.getContext(), 32); + }); + typeConverter.addConversion( + [&typeConverter](RankedTensorType type) -> Type { + const Type elementType = type.getElementType(); + if (!elementType.isInteger(64)) + return type; + return RankedTensorType::get(type.getShape(), + typeConverter.convertType(elementType)); + }); + + const auto materializeCast = [](OpBuilder &builder, Type resultType, + ValueRange inputs, Location loc) -> Value { + if (inputs.size() != 1) + return Value(); + return tosa::CastOp::create(builder, loc, resultType, inputs.front()); + }; + typeConverter.addSourceMaterialization(materializeCast); + typeConverter.addTargetMaterialization(materializeCast); + + typeConverter.addTypeAttributeConversion( + [](IntegerType type, IntegerAttr attribute) -> Attribute { + const APInt value = attribute.getValue().truncSSat(32); + return IntegerAttr::get(IntegerType::get(type.getContext(), 32), + value); + }); + typeConverter.addTypeAttributeConversion( + [&typeConverter](ShapedType type, + DenseIntElementsAttr attr) -> Attribute { + const ShapedType newType = + cast(typeConverter.convertType(type)); + const auto oldElementType = cast(type.getElementType()); + const auto newElementType = + cast(newType.getElementType()); + if (oldElementType.getWidth() == newElementType.getWidth()) + return attr; + + DenseElementsAttr mapped = + attr.mapValues(newElementType, [&](const APInt &v) { + return v.truncSSat(newElementType.getWidth()); + }); + return mapped; + }); + + ConversionTarget target(*context); + target.addDynamicallyLegalDialect( + [&typeConverter](Operation *op) { + return typeConverter.isLegal(op->getResultTypes()) && + typeConverter.isLegal(op->getOperandTypes()); + }); + if (convertFunctionBoundaries) { + target.addDynamicallyLegalOp( + [&typeConverter](func::FuncOp op) { + return typeConverter.isSignatureLegal(op.getFunctionType()) && + typeConverter.isLegal(&op.getBody()); + }); + target.addDynamicallyLegalOp([](func::ReturnOp op) { + const FunctionType funcType = + op->getParentOfType().getFunctionType(); + return llvm::equal(op.getOperandTypes(), funcType.getResults()); + }); + } else { + target.addDynamicallyLegalOp( + [](func::FuncOp op) { return true; }); + target.addDynamicallyLegalOp( + [](func::ReturnOp op) { return true; }); + } + + RewritePatternSet patterns(context); + if (convertFunctionBoundaries) { + populateFunctionOpInterfaceTypeConversionPattern( + patterns, typeConverter); + populateReturnOpTypeConversionPattern(patterns, typeConverter); + } + if (aggressiveRewrite) { + patterns.add(typeConverter, context); + } else { + // Tensor + patterns.add(typeConverter, context); + // Data layout + patterns.add>(typeConverter, context); + patterns.add>(typeConverter, context); + patterns.add>(typeConverter, context); + patterns.add>(typeConverter, context); + patterns.add>(typeConverter, context); + patterns.add>(typeConverter, context); + patterns.add>(typeConverter, context); + patterns.add>(typeConverter, context); + // Type conversion + patterns.add(typeConverter, context); + // Controlflow + patterns.add>(typeConverter, context); + patterns.add>(typeConverter, context); + } + + if (failed( + applyFullConversion(getOperation(), target, std::move(patterns)))) + signalPassFailure(); + } +}; + +} // namespace diff --git a/mlir/python/mlir/dialects/gpu/__init__.py b/mlir/python/mlir/dialects/gpu/__init__.py index 2fbcbb059f87a..d15643ca700e4 100644 --- a/mlir/python/mlir/dialects/gpu/__init__.py +++ b/mlir/python/mlir/dialects/gpu/__init__.py @@ -49,13 +49,13 @@ class GPUFuncOp(GPUFuncOp): FUNCTION_TYPE_ATTR_NAME = "function_type" SYM_NAME_ATTR_NAME = "sym_name" - ARGUMENT_ATTR_NAME = "arg_attrs" - RESULT_ATTR_NAME = "res_attrs" def __init__( self, function_type: Union[FunctionType, TypeAttr], sym_name: Optional[Union[str, StringAttr]] = None, + arg_attrs: Optional[Sequence[dict]] = None, + res_attrs: Optional[Sequence[dict]] = None, kernel: Optional[bool] = None, workgroup_attrib_attrs: Optional[Sequence[dict]] = None, private_attrib_attrs: Optional[Sequence[dict]] = None, @@ -88,6 +88,8 @@ def __init__( ) super().__init__( function_type, + arg_attrs=arg_attrs, + res_attrs=res_attrs, workgroup_attrib_attrs=workgroup_attrib_attrs, private_attrib_attrs=private_attrib_attrs, loc=loc, diff --git a/mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32-aggressive.mlir b/mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32-aggressive.mlir new file mode 100644 index 0000000000000..1a36177a37033 --- /dev/null +++ b/mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32-aggressive.mlir @@ -0,0 +1,81 @@ +// RUN: mlir-opt -split-input-file -verify-diagnostics -tosa-narrow-i64-to-i32="aggressive-rewrite=1" %s | FileCheck %s --allow-unused-prefixes --check-prefixes=COMMON,DEFAULT +// RUN: mlir-opt -split-input-file -verify-diagnostics -tosa-narrow-i64-to-i32="aggressive-rewrite=1 convert-function-boundaries=1" %s | FileCheck %s --allow-unused-prefixes --check-prefixes=COMMON,FUNCBOUND + +// CHECK-LABEL: test_i64_argmax_large_axis_dim +func.func @test_i64_argmax_large_axis_dim(%arg0: tensor<1x513x513x2147483650xi8>) -> tensor<1x513x513xi64> { + // DEFAULT: tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x2147483650xi8>) -> tensor<1x513x513xi32> + %0 = tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x2147483650xi8>) -> tensor<1x513x513xi64> + return %0 : tensor<1x513x513xi64> +} + +// ----- + +// CHECK-LABEL: test_convert_input_parameters +// DEFAULT: %[[IN:.*]]: tensor<1x513x513x3xi64> +// FUNCBOUND: %[[IN:.*]]: tensor<1x513x513x3xi32> +func.func @test_convert_input_parameters(%arg0: tensor<1x513x513x3xi64>) -> tensor<1x513x513x3xf32> { + // DEFAULT: %[[FUNC_BOUND_CAST:.*]] = tosa.cast %[[IN]] : (tensor<1x513x513x3xi64>) -> tensor<1x513x513x3xi32> + // DEFAULT: %[[CAST1:.*]] = tosa.cast %[[FUNC_BOUND_CAST]] : (tensor<1x513x513x3xi32>) -> tensor<1x513x513x3xi32> + // FUNCBOUND: %[[CAST1:.*]] = tosa.cast %[[IN]] : (tensor<1x513x513x3xi32>) -> tensor<1x513x513x3xi32> + %0 = tosa.cast %arg0 : (tensor<1x513x513x3xi64>) -> tensor<1x513x513x3xi32> + + // COMMON: %[[CAST2:.*]] = tosa.cast %[[CAST1]] : (tensor<1x513x513x3xi32>) -> tensor<1x513x513x3xf32> + %1 = tosa.cast %0 : (tensor<1x513x513x3xi32>) -> tensor<1x513x513x3xf32> + return %1 : tensor<1x513x513x3xf32> +} + +// ----- + +// CHECK-LABEL: test_add +// DEFAULT: %[[IN0:.*]]: tensor<13x21x1xi64>, %[[IN1:.*]]: tensor<13x21x3xi64> +// FUNCBOUND: %[[IN0:.*]]: tensor<13x21x1xi32>, %[[IN1:.*]]: tensor<13x21x3xi32> +func.func @test_add(%arg0: tensor<13x21x1xi64>, %arg1: tensor<13x21x3xi64>) -> tensor<13x21x3xi64> { + // DEFAULT-DAG: %[[FUNC_BOUND_CAST0:.*]] = tosa.cast %[[IN0]] : (tensor<13x21x1xi64>) -> tensor<13x21x1xi32> + // DEFAULT-DAG: %[[FUNC_BOUND_CAST1:.*]] = tosa.cast %[[IN1]] : (tensor<13x21x3xi64>) -> tensor<13x21x3xi32> + // DEFAULT: %[[ADD:.*]] = tosa.add %[[FUNC_BOUND_CAST0]], %[[FUNC_BOUND_CAST1]] : (tensor<13x21x1xi32>, tensor<13x21x3xi32>) -> tensor<13x21x3xi32> + // DEFAULT: %[[CAST:.*]] = tosa.cast %[[ADD]] : (tensor<13x21x3xi32>) -> tensor<13x21x3xi64> + // DEFAULT: return %[[CAST]] : tensor<13x21x3xi64> + // FUNCBOUND: %[[ADD:.*]] = tosa.add %[[IN0]], %[[IN1]] : (tensor<13x21x1xi32>, tensor<13x21x3xi32>) -> tensor<13x21x3xi32> + // FUNCBOUND: return %[[ADD]] : tensor<13x21x3xi32> + %0 = tosa.add %arg0, %arg1 : (tensor<13x21x1xi64>, tensor<13x21x3xi64>) -> tensor<13x21x3xi64> + return %0 : tensor<13x21x3xi64> +} + +// ----- + +// CHECK-LABEL: test_regions +// DEFAULT: %[[IN0:.*]]: tensor, %[[IN1:.*]]: tensor +func.func @test_regions(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { + // DEFAULT-DAG: %[[CAST0:.*]] = tosa.cast %[[IN0]] : (tensor) -> tensor + // DEFAULT-DAG: %[[CAST1:.*]] = tosa.cast %[[IN1]] : (tensor) -> tensor + // COMMON: %[[IF_RESULT:.*]] = tosa.cond_if + %0 = tosa.cond_if %arg2 : tensor -> (tensor) { + // DEFAULT: %[[ADD:.*]] = tosa.add %[[CAST0]], %[[CAST1]] : (tensor, tensor) -> tensor + // FUNCBOUND: %[[ADD:.*]] = tosa.add %[[IN0]], %[[IN1]] : (tensor, tensor) -> tensor + %1 = tosa.add %arg0, %arg1 : (tensor, tensor) -> tensor + // COMMON: tosa.yield %[[ADD]] : tensor + tosa.yield %1 : tensor + } else { + // DEFAULT: %[[SUB:.*]] = tosa.sub %[[CAST0]], %[[CAST1]] : (tensor, tensor) -> tensor + // FUNCBOUND: %[[SUB:.*]] = tosa.sub %[[IN0]], %[[IN1]] : (tensor, tensor) -> tensor + %1 = tosa.sub %arg0, %arg1 : (tensor, tensor) -> tensor + // COMMON: tosa.yield %[[SUB]] : tensor + tosa.yield %1 : tensor + } + // DEFAULT: %[[OUT:.*]] = tosa.cast %[[IF_RESULT]] : (tensor) -> tensor + // DEFAULT: return %[[OUT]] : tensor + // FUNCBOUND: return %[[IF_RESULT]] : tensor + return %0 : tensor +} + +// ----- + +// CHECK-LABEL: test_const +func.func @test_const() -> tensor<2xi64> { + // COMMON: %[[CONST:.*]] = "tosa.const"() <{values = dense<[1, 2]> : tensor<2xi32>}> : () -> tensor<2xi32> + %0 = "tosa.const"() <{values = dense<[1, 2]> : tensor<2xi64>}> : () -> tensor<2xi64> + // DEFAULT: %[[OUT:.*]] = tosa.cast %[[CONST]] : (tensor<2xi32>) -> tensor<2xi64> + // DEFAULT: return %[[OUT]] : tensor<2xi64> + // FUNCBOUND: return %[[CONST]] : tensor<2xi32> + return %0 : tensor<2xi64> +} diff --git a/mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32.mlir b/mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32.mlir new file mode 100644 index 0000000000000..a14483fcdd7b0 --- /dev/null +++ b/mlir/test/Dialect/Tosa/tosa-narrow-i64-to-i32.mlir @@ -0,0 +1,162 @@ +// RUN: mlir-opt -split-input-file -verify-diagnostics -tosa-narrow-i64-to-i32="convert-function-boundaries=0" %s | FileCheck %s --allow-unused-prefixes --check-prefixes=COMMON,DEFAULT +// RUN: mlir-opt -split-input-file -verify-diagnostics -tosa-narrow-i64-to-i32="convert-function-boundaries=1" %s | FileCheck %s --allow-unused-prefixes --check-prefixes=COMMON,FUNCBOUND + +// ----- + +// CHECK-LABEL: test_i64_argmax +func.func @test_i64_argmax(%arg0: tensor<1x513x513x19xi8>) -> tensor<1x513x513xi64> { + // COMMON: %[[ARGMAX:.*]] = tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x19xi8>) -> tensor<1x513x513xi32> + %0 = tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x19xi8>) -> tensor<1x513x513xi64> + + // DEFAULT: %[[CAST:.*]] = tosa.cast %[[ARGMAX]] : (tensor<1x513x513xi32>) -> tensor<1x513x513xi64> + // FUNCBOUND: return %[[ARGMAX]] : tensor<1x513x513xi32> + return %0 : tensor<1x513x513xi64> +} + +// ----- + +// CHECK-LABEL: test_i64_argmax_cast +func.func @test_i64_argmax_cast(%arg0: tensor<1x513x513x19xi8>) -> tensor<1x513x513xf32> { + // COMMON: %[[ARGMAX:.*]] = tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x19xi8>) -> tensor<1x513x513xi32> + %0 = tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x19xi8>) -> tensor<1x513x513xi64> + // COMMON: tosa.cast %[[ARGMAX]] : (tensor<1x513x513xi32>) -> tensor<1x513x513xf32> + %1 = tosa.cast %0 : (tensor<1x513x513xi64>) -> tensor<1x513x513xf32> + return %1 : tensor<1x513x513xf32> +} + +// ----- + +// CHECK-LABEL: test_i64_argmax_large_axis_dim +func.func @test_i64_argmax_large_axis_dim(%arg0: tensor<1x513x513x2147483650xi8>) -> tensor<1x513x513xi64> { + // expected-error @+1 {{failed to legalize operation 'tosa.argmax'}} + %0 = tosa.argmax %arg0 {axis = 3 : i32} : (tensor<1x513x513x2147483650xi8>) -> tensor<1x513x513xi64> + return %0 : tensor<1x513x513xi64> +} + +// ----- + +// CHECK-LABEL: test_add +func.func @test_add(%arg0: tensor<13x21x1xi64>, %arg1: tensor<13x21x3xi64>) -> tensor<13x21x3xi64> { + // expected-error @+1 {{failed to legalize operation 'tosa.add'}} + %0 = tosa.add %arg0, %arg1 : (tensor<13x21x1xi64>, tensor<13x21x3xi64>) -> tensor<13x21x3xi64> + return %0 : tensor<13x21x3xi64> +} + +// ----- + +// CHECK-LABEL: test_regions +func.func @test_regions(%arg0: tensor<1x2xi32>, %arg1: tensor<1xi32>, %arg2: tensor) -> tensor<1xi32> { + // COMMON: %[[IF_RESULT:.*]] = tosa.cond_if %arg2 : tensor -> tensor<1xi32> + %0 = tosa.cond_if %arg2 : tensor -> tensor<1xi32> { + // COMMON: %[[ARGMAX:.*]] = tosa.argmax %arg0 {axis = 1 : i32} : (tensor<1x2xi32>) -> tensor<1xi32> + %1 = tosa.argmax %arg0 {axis = 1 : i32} : (tensor<1x2xi32>) -> tensor<1xi64> + // COMMON: %[[CAST:.*]] = tosa.cast %[[ARGMAX]] : (tensor<1xi32>) -> tensor<1xi32> + %2 = tosa.cast %1 : (tensor<1xi64>) -> tensor<1xi32> + // COMMON: tosa.yield %[[CAST]] : tensor<1xi32> + tosa.yield %2 : tensor<1xi32> + } else { + tosa.yield %arg1 : tensor<1xi32> + } + // COMMON: return %[[IF_RESULT]] : tensor<1xi32> + return %0 : tensor<1xi32> +} + +// ----- + +// CHECK-LABEL: test_concat +func.func @test_concat(%arg0: tensor<13x21x3xi64>, %arg1: tensor<13x21x3xi64>) -> tensor<26x21x3xi64> { + // COMMON: tosa.concat %{{.*}}, %{{.*}} {axis = 0 : i32} : (tensor<13x21x3xi32>, tensor<13x21x3xi32>) -> tensor<26x21x3xi32> + %0 = tosa.concat %arg0, %arg1 {axis = 0 : i32} : (tensor<13x21x3xi64>, tensor<13x21x3xi64>) -> tensor<26x21x3xi64> + return %0 : tensor<26x21x3xi64> +} + +// ----- + +// CHECK-LABEL: test_pad +func.func @test_pad(%arg0: tensor<13x21x3xi64>, %arg1: tensor<1xi64>) -> tensor<15x23x5xi64> { + %padding = tosa.const_shape {values = dense<1> : tensor<6xindex>} : () -> !tosa.shape<6> + // COMMON: tosa.pad %{{.*}}, %{{.*}}, %{{.*}} : (tensor<13x21x3xi32>, !tosa.shape<6>, tensor<1xi32>) -> tensor<15x23x5xi32> + %1 = tosa.pad %arg0, %padding, %arg1 : (tensor<13x21x3xi64>, !tosa.shape<6>, tensor<1xi64>) -> tensor<15x23x5xi64> + return %1 : tensor<15x23x5xi64> +} + +// ----- + +// CHECK-LABEL: test_reshape +func.func @test_reshape(%arg0: tensor<13x21x3xi64>) -> tensor<1x819xi64> { + %1 = tosa.const_shape {values = dense<[1, 819]> : tensor<2xindex>} : () -> !tosa.shape<2> + // COMMON: tosa.reshape %{{.*}}, %{{.*}} : (tensor<13x21x3xi32>, !tosa.shape<2>) -> tensor<1x819xi32> + %0 = tosa.reshape %arg0, %1 : (tensor<13x21x3xi64>, !tosa.shape<2>) -> tensor<1x819xi64> + return %0 : tensor<1x819xi64> +} + +// ----- + +// CHECK-LABEL: test_reverse +func.func @test_reverse(%arg0: tensor<13x21x3xi64>) -> tensor<13x21x3xi64> { + // COMMON: tosa.reverse %{{.*}} {axis = 0 : i32} : (tensor<13x21x3xi32>) -> tensor<13x21x3xi32> + %0 = tosa.reverse %arg0 {axis = 0 : i32} : (tensor<13x21x3xi64>) -> tensor<13x21x3xi64> + return %0 : tensor<13x21x3xi64> +} + +// ----- + +// CHECK-LABEL: test_slice +func.func @test_slice(%arg0: tensor<13x21x3xi64>) -> tensor<4x11x1xi64> { + %0 = tosa.const_shape {values = dense<[4, 11, 1]> : tensor<3xindex>} : () -> !tosa.shape<3> + %1 = tosa.const_shape {values = dense<[6, 8, 0]> : tensor<3xindex>} : () -> !tosa.shape<3> + // COMMON: tosa.slice %{{.*}}, %{{.*}}, %{{.*}} : (tensor<13x21x3xi32>, !tosa.shape<3>, !tosa.shape<3>) -> tensor<4x11x1xi32> + %2 = tosa.slice %arg0, %0, %1 : (tensor<13x21x3xi64>, !tosa.shape<3>, !tosa.shape<3>) -> tensor<4x11x1xi64> + return %2 : tensor<4x11x1xi64> +} + +// ----- + +// CHECK-LABEL: test_tile +func.func @test_tile(%arg0: tensor<13x21x3xi64>) -> tensor<39x21x6xi64> { + %cst = tosa.const_shape { values = dense<[3, 1, 2]> : tensor<3xindex> } : () -> !tosa.shape<3> + // COMMON: tosa.tile %{{.*}}, %{{.*}} : (tensor<13x21x3xi32>, !tosa.shape<3>) -> tensor<39x21x6xi32> + %0 = tosa.tile %arg0, %cst: (tensor<13x21x3xi64>, !tosa.shape<3>) -> tensor<39x21x6xi64> + return %0 : tensor<39x21x6xi64> +} + +// ----- + +// CHECK-LABEL: transpose +func.func @test_transpose(%arg0: tensor<13x21x3xi64>) -> tensor<3x13x21xi64> { + // COMMON: tosa.transpose %{{.*}} {perms = array} : (tensor<13x21x3xi32>) -> tensor<3x13x21xi32> + %1 = tosa.transpose %arg0 {perms = array} : (tensor<13x21x3xi64>) -> tensor<3x13x21xi64> + return %1 : tensor<3x13x21xi64> +} + +// ----- + +// CHECK-LABEL: test_transition_to_i64 +func.func @test_transition_to_i64(%arg0: tensor<1xi32>) -> tensor<1xi64> { + // COMMON: %[[CAST:.*]] = tosa.cast %arg0 : (tensor<1xi32>) -> tensor<1xi32> + %0 = tosa.cast %arg0 : (tensor<1xi32>) -> tensor<1xi64> + // COMMON: %[[IDENTITY1:.*]] = tosa.identity %[[CAST]] : (tensor<1xi32>) -> tensor<1xi32> + %1 = tosa.identity %0 : (tensor<1xi64>) -> tensor<1xi64> + // COMMON: %[[IDENTITY2:.*]] = tosa.identity %[[IDENTITY1]] : (tensor<1xi32>) -> tensor<1xi32> + %2 = tosa.identity %1 : (tensor<1xi64>) -> tensor<1xi64> + // DEFAULT: %[[OUT_CAST:.*]] = tosa.cast %[[IDENTITY2]] : (tensor<1xi32>) -> tensor<1xi64> + // DEFAULT: return %[[OUT_CAST]] : tensor<1xi64> + // FUNCBOUND: return %[[IDENTITY2]] : tensor<1xi32> + return %2 : tensor<1xi64> +} + +// ----- + +// CHECK-LABEL: test_transition_from_i64 +func.func @test_transition_from_i64(%arg0: tensor<1xi64>) -> tensor<1xi32> { + // DEFAULT: %[[CAST:.*]] = tosa.cast %arg0 : (tensor<1xi64>) -> tensor<1xi32> + // DEFAULT: %[[IDENTITY1:.*]] = tosa.identity %[[CAST]] : (tensor<1xi32>) -> tensor<1xi32> + // FUNCBOUND: %[[IDENTITY1:.*]] = tosa.identity %arg0 : (tensor<1xi32>) -> tensor<1xi32> + %0 = tosa.identity %arg0 : (tensor<1xi64>) -> tensor<1xi64> + // COMMON: %[[IDENTITY2:.*]] = tosa.identity %[[IDENTITY1]] : (tensor<1xi32>) -> tensor<1xi32> + %1 = tosa.identity %0 : (tensor<1xi64>) -> tensor<1xi64> + // COMMON: %[[OUT_CAST:.*]] = tosa.cast %[[IDENTITY2]] : (tensor<1xi32>) -> tensor<1xi32> + %2 = tosa.cast %1 : (tensor<1xi64>) -> tensor<1xi32> + // COMMON: return %[[OUT_CAST]] : tensor<1xi32> + return %2 : tensor<1xi32> +} diff --git a/mlir/test/python/dialects/gpu/dialect.py b/mlir/test/python/dialects/gpu/dialect.py index 3945c99c41091..1a009b7dfa30d 100644 --- a/mlir/test/python/dialects/gpu/dialect.py +++ b/mlir/test/python/dialects/gpu/dialect.py @@ -133,9 +133,10 @@ def builder(func: gpu.GPUFuncOp) -> None: ), func.known_grid_size func = gpu.GPUFuncOp( - func_type, + ir.FunctionType.get(inputs=[T.index()], results=[]), sym_name="non_kernel_func", body_builder=builder, + arg_attrs=[{"gpu.some_attribute": ir.StringAttr.get("foo")}], ) assert not func.is_kernel assert func.known_block_size is None @@ -154,10 +155,11 @@ def builder(func: gpu.GPUFuncOp) -> None: # CHECK: %[[VAL_0:.*]] = gpu.global_id x # CHECK: gpu.return # CHECK: } - # CHECK: gpu.func @non_kernel_func() { - # CHECK: %[[VAL_0:.*]] = gpu.global_id x - # CHECK: gpu.return - # CHECK: } + # CHECK: gpu.func @non_kernel_func( + # CHECK-SAME: %[[ARG0:.*]]: index {gpu.some_attribute = "foo"}) { + # CHECK: %[[GLOBAL_ID_0:.*]] = gpu.global_id x + # CHECK: gpu.return + # CHECK: } # CHECK-LABEL: testGPULaunchFuncOp diff --git a/utils/bazel/MODULE.bazel b/utils/bazel/MODULE.bazel new file mode 100644 index 0000000000000..d061487acf4d7 --- /dev/null +++ b/utils/bazel/MODULE.bazel @@ -0,0 +1,38 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +"""bzlmod configuration for llvm-project""" +module(name = "llvm-project-overlay") + +bazel_dep(name = "apple_support", version = "1.24.1", repo_name = "build_bazel_apple_support") +bazel_dep(name = "bazel_skylib", version = "1.8.2") +bazel_dep(name = "platforms", version = "1.0.0") +bazel_dep(name = "rules_android", version = "0.6.6") +bazel_dep(name = "rules_cc", version = "0.2.11") +bazel_dep(name = "rules_foreign_cc", version = "0.15.1") +bazel_dep(name = "rules_python", version = "1.6.3") +bazel_dep(name = "rules_shell", version = "0.6.1") + +llvm_repos_extension = use_extension(":extensions.bzl", "llvm_repos_extension") + +use_repo( + llvm_repos_extension, + "llvm-raw", + "llvm_zlib", + "vulkan_headers", + "vulkan_sdk_setup", + "gmp", + "mpfr", + "mpc", + "pfm", + "llvm_zstd", + "pybind11", + "pyyaml", + "robin_map", + "nanobind", +) + +llvm_configure = use_repo_rule("@llvm-raw//utils/bazel:configure.bzl", "llvm_configure") + +llvm_configure(name = "llvm-project") diff --git a/utils/bazel/MODULE.bazel.lock b/utils/bazel/MODULE.bazel.lock new file mode 100644 index 0000000000000..64de258401e91 --- /dev/null +++ b/utils/bazel/MODULE.bazel.lock @@ -0,0 +1,490 @@ +{ + "lockFileVersion": 16, + "registryFileHashes": { + "https://bcr.bazel.build/bazel_registry.json": "8a28e4aff06ee60aed2a8c281907fb8bcbf3b753c91fb5a5c57da3215d5b3497", + "https://bcr.bazel.build/modules/abseil-cpp/20210324.2/MODULE.bazel": "7cd0312e064fde87c8d1cd79ba06c876bd23630c83466e9500321be55c96ace2", + "https://bcr.bazel.build/modules/abseil-cpp/20211102.0/MODULE.bazel": "70390338f7a5106231d20620712f7cccb659cd0e9d073d1991c038eb9fc57589", + "https://bcr.bazel.build/modules/abseil-cpp/20230125.1/MODULE.bazel": "89047429cb0207707b2dface14ba7f8df85273d484c2572755be4bab7ce9c3a0", + "https://bcr.bazel.build/modules/abseil-cpp/20230802.0.bcr.1/MODULE.bazel": "1c8cec495288dccd14fdae6e3f95f772c1c91857047a098fad772034264cc8cb", + "https://bcr.bazel.build/modules/abseil-cpp/20230802.0/MODULE.bazel": "d253ae36a8bd9ee3c5955384096ccb6baf16a1b1e93e858370da0a3b94f77c16", + "https://bcr.bazel.build/modules/abseil-cpp/20230802.1/MODULE.bazel": "fa92e2eb41a04df73cdabeec37107316f7e5272650f81d6cc096418fe647b915", + "https://bcr.bazel.build/modules/abseil-cpp/20240116.1/MODULE.bazel": "37bcdb4440fbb61df6a1c296ae01b327f19e9bb521f9b8e26ec854b6f97309ed", + "https://bcr.bazel.build/modules/abseil-cpp/20240116.2/MODULE.bazel": "73939767a4686cd9a520d16af5ab440071ed75cec1a876bf2fcfaf1f71987a16", + "https://bcr.bazel.build/modules/abseil-cpp/20250127.0/MODULE.bazel": "d1086e248cda6576862b4b3fe9ad76a214e08c189af5b42557a6e1888812c5d5", + "https://bcr.bazel.build/modules/abseil-cpp/20250127.0/source.json": "1b996859f840d8efc7c720efc61dcf2a84b1261cb3974cbbe9b6666ebf567775", + "https://bcr.bazel.build/modules/abseil-py/2.1.0/MODULE.bazel": "5ebe5bf853769c65707e5c28f216798f7a4b1042015e6a36e6d03094d94bec8a", + "https://bcr.bazel.build/modules/abseil-py/2.1.0/source.json": "0e8fc4f088ce07099c1cd6594c20c7ddbb48b4b3c0849b7d94ba94be88ff042b", + "https://bcr.bazel.build/modules/apple_support/1.11.1/MODULE.bazel": "1843d7cd8a58369a444fc6000e7304425fba600ff641592161d9f15b179fb896", + "https://bcr.bazel.build/modules/apple_support/1.15.1/MODULE.bazel": "a0556fefca0b1bb2de8567b8827518f94db6a6e7e7d632b4c48dc5f865bc7c85", + "https://bcr.bazel.build/modules/apple_support/1.24.1/MODULE.bazel": "f46e8ddad60aef170ee92b2f3d00ef66c147ceafea68b6877cb45bd91737f5f8", + "https://bcr.bazel.build/modules/apple_support/1.24.1/source.json": "cf725267cbacc5f028ef13bb77e7f2c2e0066923a4dab1025e4a0511b1ed258a", + "https://bcr.bazel.build/modules/bazel_features/1.1.0/MODULE.bazel": "cfd42ff3b815a5f39554d97182657f8c4b9719568eb7fded2b9135f084bf760b", + "https://bcr.bazel.build/modules/bazel_features/1.1.1/MODULE.bazel": "27b8c79ef57efe08efccbd9dd6ef70d61b4798320b8d3c134fd571f78963dbcd", + "https://bcr.bazel.build/modules/bazel_features/1.11.0/MODULE.bazel": "f9382337dd5a474c3b7d334c2f83e50b6eaedc284253334cf823044a26de03e8", + "https://bcr.bazel.build/modules/bazel_features/1.13.0/MODULE.bazel": "c14c33c7c3c730612bdbe14ebbb5e61936b6f11322ea95a6e91cd1ba962f94df", + "https://bcr.bazel.build/modules/bazel_features/1.15.0/MODULE.bazel": "d38ff6e517149dc509406aca0db3ad1efdd890a85e049585b7234d04238e2a4d", + "https://bcr.bazel.build/modules/bazel_features/1.17.0/MODULE.bazel": "039de32d21b816b47bd42c778e0454217e9c9caac4a3cf8e15c7231ee3ddee4d", + "https://bcr.bazel.build/modules/bazel_features/1.18.0/MODULE.bazel": "1be0ae2557ab3a72a57aeb31b29be347bcdc5d2b1eb1e70f39e3851a7e97041a", + "https://bcr.bazel.build/modules/bazel_features/1.19.0/MODULE.bazel": "59adcdf28230d220f0067b1f435b8537dd033bfff8db21335ef9217919c7fb58", + "https://bcr.bazel.build/modules/bazel_features/1.21.0/MODULE.bazel": "675642261665d8eea09989aa3b8afb5c37627f1be178382c320d1b46afba5e3b", + "https://bcr.bazel.build/modules/bazel_features/1.23.0/MODULE.bazel": "fd1ac84bc4e97a5a0816b7fd7d4d4f6d837b0047cf4cbd81652d616af3a6591a", + "https://bcr.bazel.build/modules/bazel_features/1.27.0/MODULE.bazel": "621eeee06c4458a9121d1f104efb80f39d34deff4984e778359c60eaf1a8cb65", + "https://bcr.bazel.build/modules/bazel_features/1.28.0/MODULE.bazel": "4b4200e6cbf8fa335b2c3f43e1d6ef3e240319c33d43d60cc0fbd4b87ece299d", + "https://bcr.bazel.build/modules/bazel_features/1.3.0/MODULE.bazel": "cdcafe83ec318cda34e02948e81d790aab8df7a929cec6f6969f13a489ccecd9", + "https://bcr.bazel.build/modules/bazel_features/1.30.0/MODULE.bazel": "a14b62d05969a293b80257e72e597c2da7f717e1e69fa8b339703ed6731bec87", + "https://bcr.bazel.build/modules/bazel_features/1.30.0/source.json": "b07e17f067fe4f69f90b03b36ef1e08fe0d1f3cac254c1241a1818773e3423bc", + "https://bcr.bazel.build/modules/bazel_features/1.4.1/MODULE.bazel": "e45b6bb2350aff3e442ae1111c555e27eac1d915e77775f6fdc4b351b758b5d7", + "https://bcr.bazel.build/modules/bazel_features/1.9.1/MODULE.bazel": "8f679097876a9b609ad1f60249c49d68bfab783dd9be012faf9d82547b14815a", + "https://bcr.bazel.build/modules/bazel_skylib/1.0.3/MODULE.bazel": "bcb0fd896384802d1ad283b4e4eb4d718eebd8cb820b0a2c3a347fb971afd9d8", + "https://bcr.bazel.build/modules/bazel_skylib/1.1.1/MODULE.bazel": "1add3e7d93ff2e6998f9e118022c84d163917d912f5afafb3058e3d2f1545b5e", + "https://bcr.bazel.build/modules/bazel_skylib/1.2.0/MODULE.bazel": "44fe84260e454ed94ad326352a698422dbe372b21a1ac9f3eab76eb531223686", + "https://bcr.bazel.build/modules/bazel_skylib/1.2.1/MODULE.bazel": "f35baf9da0efe45fa3da1696ae906eea3d615ad41e2e3def4aeb4e8bc0ef9a7a", + "https://bcr.bazel.build/modules/bazel_skylib/1.3.0/MODULE.bazel": "20228b92868bf5cfc41bda7afc8a8ba2a543201851de39d990ec957b513579c5", + "https://bcr.bazel.build/modules/bazel_skylib/1.4.1/MODULE.bazel": "a0dcb779424be33100dcae821e9e27e4f2901d9dfd5333efe5ac6a8d7ab75e1d", + "https://bcr.bazel.build/modules/bazel_skylib/1.4.2/MODULE.bazel": "3bd40978e7a1fac911d5989e6b09d8f64921865a45822d8b09e815eaa726a651", + "https://bcr.bazel.build/modules/bazel_skylib/1.5.0/MODULE.bazel": "32880f5e2945ce6a03d1fbd588e9198c0a959bb42297b2cfaf1685b7bc32e138", + "https://bcr.bazel.build/modules/bazel_skylib/1.6.1/MODULE.bazel": "8fdee2dbaace6c252131c00e1de4b165dc65af02ea278476187765e1a617b917", + "https://bcr.bazel.build/modules/bazel_skylib/1.7.0/MODULE.bazel": "0db596f4563de7938de764cc8deeabec291f55e8ec15299718b93c4423e9796d", + "https://bcr.bazel.build/modules/bazel_skylib/1.7.1/MODULE.bazel": "3120d80c5861aa616222ec015332e5f8d3171e062e3e804a2a0253e1be26e59b", + "https://bcr.bazel.build/modules/bazel_skylib/1.8.1/MODULE.bazel": "88ade7293becda963e0e3ea33e7d54d3425127e0a326e0d17da085a5f1f03ff6", + "https://bcr.bazel.build/modules/bazel_skylib/1.8.2/MODULE.bazel": "69ad6927098316848b34a9142bcc975e018ba27f08c4ff403f50c1b6e646ca67", + "https://bcr.bazel.build/modules/bazel_skylib/1.8.2/source.json": "34a3c8bcf233b835eb74be9d628899bb32999d3e0eadef1947a0a562a2b16ffb", + "https://bcr.bazel.build/modules/bazel_worker_api/0.0.1/MODULE.bazel": "02a13b77321773b2042e70ee5e4c5e099c8ddee4cf2da9cd420442c36938d4bd", + "https://bcr.bazel.build/modules/bazel_worker_api/0.0.4/MODULE.bazel": "460aa12d01231a80cce03c548287b433b321d205b0028ae596728c35e5ee442e", + "https://bcr.bazel.build/modules/bazel_worker_api/0.0.4/source.json": "d353c410d47a8b65d09fa98e83d57ebec257a2c2b9c6e42d6fda1cb25e5464a5", + "https://bcr.bazel.build/modules/bazel_worker_java/0.0.4/MODULE.bazel": "82494a01018bb7ef06d4a17ec4cd7a758721f10eb8b6c820a818e70d669500db", + "https://bcr.bazel.build/modules/bazel_worker_java/0.0.4/source.json": "a2d30458fd86cf022c2b6331e652526fa08e17573b2f5034a9dbcacdf9c2583c", + "https://bcr.bazel.build/modules/buildozer/7.1.2/MODULE.bazel": "2e8dd40ede9c454042645fd8d8d0cd1527966aa5c919de86661e62953cd73d84", + "https://bcr.bazel.build/modules/buildozer/7.1.2/source.json": "c9028a501d2db85793a6996205c8de120944f50a0d570438fcae0457a5f9d1f8", + "https://bcr.bazel.build/modules/gazelle/0.32.0/MODULE.bazel": "b499f58a5d0d3537f3cf5b76d8ada18242f64ec474d8391247438bf04f58c7b8", + "https://bcr.bazel.build/modules/gazelle/0.33.0/MODULE.bazel": "a13a0f279b462b784fb8dd52a4074526c4a2afe70e114c7d09066097a46b3350", + "https://bcr.bazel.build/modules/gazelle/0.34.0/MODULE.bazel": "abdd8ce4d70978933209db92e436deb3a8b737859e9354fb5fd11fb5c2004c8a", + "https://bcr.bazel.build/modules/gazelle/0.36.0/MODULE.bazel": "e375d5d6e9a6ca59b0cb38b0540bc9a05b6aa926d322f2de268ad267a2ee74c0", + "https://bcr.bazel.build/modules/gazelle/0.40.0/MODULE.bazel": "42ba5378ebe845fca43989a53186ab436d956db498acde790685fe0e8f9c6146", + "https://bcr.bazel.build/modules/gazelle/0.40.0/source.json": "1e5ef6e4d8b9b6836d93273c781e78ff829ea2e077afef7a57298040fa4f010a", + "https://bcr.bazel.build/modules/google_benchmark/1.8.2/MODULE.bazel": "a70cf1bba851000ba93b58ae2f6d76490a9feb74192e57ab8e8ff13c34ec50cb", + "https://bcr.bazel.build/modules/googletest/1.11.0/MODULE.bazel": "3a83f095183f66345ca86aa13c58b59f9f94a2f81999c093d4eeaa2d262d12f4", + "https://bcr.bazel.build/modules/googletest/1.14.0.bcr.1/MODULE.bazel": "22c31a561553727960057361aa33bf20fb2e98584bc4fec007906e27053f80c6", + "https://bcr.bazel.build/modules/googletest/1.14.0/MODULE.bazel": "cfbcbf3e6eac06ef9d85900f64424708cc08687d1b527f0ef65aa7517af8118f", + "https://bcr.bazel.build/modules/googletest/1.15.2/MODULE.bazel": "6de1edc1d26cafb0ea1a6ab3f4d4192d91a312fd2d360b63adaa213cd00b2108", + "https://bcr.bazel.build/modules/googletest/1.15.2/source.json": "dbdda654dcb3a0d7a8bc5d0ac5fc7e150b58c2a986025ae5bc634bb2cb61f470", + "https://bcr.bazel.build/modules/jsoncpp/1.9.5/MODULE.bazel": "31271aedc59e815656f5736f282bb7509a97c7ecb43e927ac1a37966e0578075", + "https://bcr.bazel.build/modules/jsoncpp/1.9.6/MODULE.bazel": "2f8d20d3b7d54143213c4dfc3d98225c42de7d666011528dc8fe91591e2e17b0", + "https://bcr.bazel.build/modules/jsoncpp/1.9.6/source.json": "a04756d367a2126c3541682864ecec52f92cdee80a35735a3cb249ce015ca000", + "https://bcr.bazel.build/modules/libpfm/4.11.0/MODULE.bazel": "45061ff025b301940f1e30d2c16bea596c25b176c8b6b3087e92615adbd52902", + "https://bcr.bazel.build/modules/nlohmann_json/3.6.1/MODULE.bazel": "6f7b417dcc794d9add9e556673ad25cb3ba835224290f4f848f8e2db1e1fca74", + "https://bcr.bazel.build/modules/nlohmann_json/3.6.1/source.json": "f448c6e8963fdfa7eb831457df83ad63d3d6355018f6574fb017e8169deb43a9", + "https://bcr.bazel.build/modules/platforms/0.0.10/MODULE.bazel": "8cb8efaf200bdeb2150d93e162c40f388529a25852b332cec879373771e48ed5", + "https://bcr.bazel.build/modules/platforms/0.0.11/MODULE.bazel": "0daefc49732e227caa8bfa834d65dc52e8cc18a2faf80df25e8caea151a9413f", + "https://bcr.bazel.build/modules/platforms/0.0.4/MODULE.bazel": "9b328e31ee156f53f3c416a64f8491f7eb731742655a47c9eec4703a71644aee", + "https://bcr.bazel.build/modules/platforms/0.0.5/MODULE.bazel": "5733b54ea419d5eaf7997054bb55f6a1d0b5ff8aedf0176fef9eea44f3acda37", + "https://bcr.bazel.build/modules/platforms/0.0.6/MODULE.bazel": "ad6eeef431dc52aefd2d77ed20a4b353f8ebf0f4ecdd26a807d2da5aa8cd0615", + "https://bcr.bazel.build/modules/platforms/0.0.7/MODULE.bazel": "72fd4a0ede9ee5c021f6a8dd92b503e089f46c227ba2813ff183b71616034814", + "https://bcr.bazel.build/modules/platforms/0.0.8/MODULE.bazel": "9f142c03e348f6d263719f5074b21ef3adf0b139ee4c5133e2aa35664da9eb2d", + "https://bcr.bazel.build/modules/platforms/0.0.9/MODULE.bazel": "4a87a60c927b56ddd67db50c89acaa62f4ce2a1d2149ccb63ffd871d5ce29ebc", + "https://bcr.bazel.build/modules/platforms/1.0.0/MODULE.bazel": "f05feb42b48f1b3c225e4ccf351f367be0371411a803198ec34a389fb22aa580", + "https://bcr.bazel.build/modules/platforms/1.0.0/source.json": "f4ff1fd412e0246fd38c82328eb209130ead81d62dcd5a9e40910f867f733d96", + "https://bcr.bazel.build/modules/protobuf/21.7/MODULE.bazel": "a5a29bb89544f9b97edce05642fac225a808b5b7be74038ea3640fae2f8e66a7", + "https://bcr.bazel.build/modules/protobuf/23.1/MODULE.bazel": "88b393b3eb4101d18129e5db51847cd40a5517a53e81216144a8c32dfeeca52a", + "https://bcr.bazel.build/modules/protobuf/24.4/MODULE.bazel": "7bc7ce5f2abf36b3b7b7c8218d3acdebb9426aeb35c2257c96445756f970eb12", + "https://bcr.bazel.build/modules/protobuf/27.0/MODULE.bazel": "7873b60be88844a0a1d8f80b9d5d20cfbd8495a689b8763e76c6372998d3f64c", + "https://bcr.bazel.build/modules/protobuf/27.1/MODULE.bazel": "703a7b614728bb06647f965264967a8ef1c39e09e8f167b3ca0bb1fd80449c0d", + "https://bcr.bazel.build/modules/protobuf/27.2/MODULE.bazel": "32450b50673882e4c8c3d10a83f3bc82161b213ed2f80d17e38bece8f165c295", + "https://bcr.bazel.build/modules/protobuf/29.0-rc2/MODULE.bazel": "6241d35983510143049943fc0d57937937122baf1b287862f9dc8590fc4c37df", + "https://bcr.bazel.build/modules/protobuf/29.0-rc3/MODULE.bazel": "33c2dfa286578573afc55a7acaea3cada4122b9631007c594bf0729f41c8de92", + "https://bcr.bazel.build/modules/protobuf/29.0/MODULE.bazel": "319dc8bf4c679ff87e71b1ccfb5a6e90a6dbc4693501d471f48662ac46d04e4e", + "https://bcr.bazel.build/modules/protobuf/3.19.0/MODULE.bazel": "6b5fbb433f760a99a22b18b6850ed5784ef0e9928a72668b66e4d7ccd47db9b0", + "https://bcr.bazel.build/modules/protobuf/3.19.2/MODULE.bazel": "532ffe5f2186b69fdde039efe6df13ba726ff338c6bc82275ad433013fa10573", + "https://bcr.bazel.build/modules/protobuf/3.19.6/MODULE.bazel": "9233edc5e1f2ee276a60de3eaa47ac4132302ef9643238f23128fea53ea12858", + "https://bcr.bazel.build/modules/protobuf/31.1/MODULE.bazel": "379a389bb330b7b8c1cdf331cc90bf3e13de5614799b3b52cdb7c6f389f6b38e", + "https://bcr.bazel.build/modules/protobuf/31.1/source.json": "25af5d0219da0c0fc4d1191a24ce438e6ca7f49d2e1a94f354efeba6ef10426f", + "https://bcr.bazel.build/modules/pybind11_bazel/2.11.1/MODULE.bazel": "88af1c246226d87e65be78ed49ecd1e6f5e98648558c14ce99176da041dc378e", + "https://bcr.bazel.build/modules/pybind11_bazel/2.12.0/MODULE.bazel": "e6f4c20442eaa7c90d7190d8dc539d0ab422f95c65a57cc59562170c58ae3d34", + "https://bcr.bazel.build/modules/pybind11_bazel/2.12.0/source.json": "6900fdc8a9e95866b8c0d4ad4aba4d4236317b5c1cd04c502df3f0d33afed680", + "https://bcr.bazel.build/modules/re2/2023-09-01/MODULE.bazel": "cb3d511531b16cfc78a225a9e2136007a48cf8a677e4264baeab57fe78a80206", + "https://bcr.bazel.build/modules/re2/2024-07-02.bcr.1/MODULE.bazel": "b4963dda9b31080be1905ef085ecd7dd6cd47c05c79b9cdf83ade83ab2ab271a", + "https://bcr.bazel.build/modules/re2/2024-07-02.bcr.1/source.json": "2ff292be6ef3340325ce8a045ecc326e92cbfab47c7cbab4bd85d28971b97ac4", + "https://bcr.bazel.build/modules/re2/2024-07-02/MODULE.bazel": "0eadc4395959969297cbcf31a249ff457f2f1d456228c67719480205aa306daa", + "https://bcr.bazel.build/modules/rules_android/0.1.1/MODULE.bazel": "48809ab0091b07ad0182defb787c4c5328bd3a278938415c00a7b69b50c4d3a8", + "https://bcr.bazel.build/modules/rules_android/0.6.6/MODULE.bazel": "b0fb569752aab65ab1a9db0a8f6cfaf5aa1754965e17e95dcf0e4d88e192a68d", + "https://bcr.bazel.build/modules/rules_android/0.6.6/source.json": "a9d8dc2d5a102dc03269a94acc886a4cab82cdcb9ccbc77b0f665d6d17a6ae09", + "https://bcr.bazel.build/modules/rules_apple/3.16.0/MODULE.bazel": "0d1caf0b8375942ce98ea944be754a18874041e4e0459401d925577624d3a54a", + "https://bcr.bazel.build/modules/rules_apple/3.16.0/source.json": "d8b5fe461272018cc07cfafce11fe369c7525330804c37eec5a82f84cd475366", + "https://bcr.bazel.build/modules/rules_cc/0.0.1/MODULE.bazel": "cb2aa0747f84c6c3a78dad4e2049c154f08ab9d166b1273835a8174940365647", + "https://bcr.bazel.build/modules/rules_cc/0.0.10/MODULE.bazel": "ec1705118f7eaedd6e118508d3d26deba2a4e76476ada7e0e3965211be012002", + "https://bcr.bazel.build/modules/rules_cc/0.0.13/MODULE.bazel": "0e8529ed7b323dad0775ff924d2ae5af7640b23553dfcd4d34344c7e7a867191", + "https://bcr.bazel.build/modules/rules_cc/0.0.14/MODULE.bazel": "5e343a3aac88b8d7af3b1b6d2093b55c347b8eefc2e7d1442f7a02dc8fea48ac", + "https://bcr.bazel.build/modules/rules_cc/0.0.15/MODULE.bazel": "6704c35f7b4a72502ee81f61bf88706b54f06b3cbe5558ac17e2e14666cd5dcc", + "https://bcr.bazel.build/modules/rules_cc/0.0.16/MODULE.bazel": "7661303b8fc1b4d7f532e54e9d6565771fea666fbdf839e0a86affcd02defe87", + "https://bcr.bazel.build/modules/rules_cc/0.0.17/MODULE.bazel": "2ae1d8f4238ec67d7185d8861cb0a2cdf4bc608697c331b95bf990e69b62e64a", + "https://bcr.bazel.build/modules/rules_cc/0.0.2/MODULE.bazel": "6915987c90970493ab97393024c156ea8fb9f3bea953b2f3ec05c34f19b5695c", + "https://bcr.bazel.build/modules/rules_cc/0.0.6/MODULE.bazel": "abf360251023dfe3efcef65ab9d56beefa8394d4176dd29529750e1c57eaa33f", + "https://bcr.bazel.build/modules/rules_cc/0.0.8/MODULE.bazel": "964c85c82cfeb6f3855e6a07054fdb159aced38e99a5eecf7bce9d53990afa3e", + "https://bcr.bazel.build/modules/rules_cc/0.0.9/MODULE.bazel": "836e76439f354b89afe6a911a7adf59a6b2518fafb174483ad78a2a2fde7b1c5", + "https://bcr.bazel.build/modules/rules_cc/0.1.1/MODULE.bazel": "2f0222a6f229f0bf44cd711dc13c858dad98c62d52bd51d8fc3a764a83125513", + "https://bcr.bazel.build/modules/rules_cc/0.2.11/MODULE.bazel": "e94f24f065bf2191dba2dace951814378b66a94bb3bcc48077492fe0508059b5", + "https://bcr.bazel.build/modules/rules_cc/0.2.11/source.json": "4d555dc20c9c135b21b2e403cf0ce8393fb65711b2305979ce053df4ee3e78de", + "https://bcr.bazel.build/modules/rules_cc/0.2.8/MODULE.bazel": "f1df20f0bf22c28192a794f29b501ee2018fa37a3862a1a2132ae2940a23a642", + "https://bcr.bazel.build/modules/rules_foreign_cc/0.15.1/MODULE.bazel": "c2c60d26c79fda484acb95cdbec46e89d6b28b4845cb277160ce1e0c8622bb88", + "https://bcr.bazel.build/modules/rules_foreign_cc/0.15.1/source.json": "a161811a63ba8a859086da3b7ff3ad04f2e9c255d7727b41087103fc0eb22f55", + "https://bcr.bazel.build/modules/rules_foreign_cc/0.9.0/MODULE.bazel": "c9e8c682bf75b0e7c704166d79b599f93b72cfca5ad7477df596947891feeef6", + "https://bcr.bazel.build/modules/rules_fuzzing/0.5.2/MODULE.bazel": "40c97d1144356f52905566c55811f13b299453a14ac7769dfba2ac38192337a8", + "https://bcr.bazel.build/modules/rules_go/0.41.0/MODULE.bazel": "55861d8e8bb0e62cbd2896f60ff303f62ffcb0eddb74ecb0e5c0cbe36fc292c8", + "https://bcr.bazel.build/modules/rules_go/0.42.0/MODULE.bazel": "8cfa875b9aa8c6fce2b2e5925e73c1388173ea3c32a0db4d2b4804b453c14270", + "https://bcr.bazel.build/modules/rules_go/0.46.0/MODULE.bazel": "3477df8bdcc49e698b9d25f734c4f3a9f5931ff34ee48a2c662be168f5f2d3fd", + "https://bcr.bazel.build/modules/rules_go/0.50.1/MODULE.bazel": "b91a308dc5782bb0a8021ad4330c81fea5bda77f96b9e4c117b9b9c8f6665ee0", + "https://bcr.bazel.build/modules/rules_go/0.51.0-rc2/MODULE.bazel": "edfc3a9cea7bedb0eaaff37b0d7817c1a4bf72b3c615580b0ffcee6c52690fd4", + "https://bcr.bazel.build/modules/rules_go/0.51.0-rc2/source.json": "6b5cd0b3da2bd0e6949580851db990a04af0a285f072b9a0f059424457cd8cc9", + "https://bcr.bazel.build/modules/rules_java/4.0.0/MODULE.bazel": "5a78a7ae82cd1a33cef56dc578c7d2a46ed0dca12643ee45edbb8417899e6f74", + "https://bcr.bazel.build/modules/rules_java/5.3.5/MODULE.bazel": "a4ec4f2db570171e3e5eb753276ee4b389bae16b96207e9d3230895c99644b86", + "https://bcr.bazel.build/modules/rules_java/6.0.0/MODULE.bazel": "8a43b7df601a7ec1af61d79345c17b31ea1fedc6711fd4abfd013ea612978e39", + "https://bcr.bazel.build/modules/rules_java/6.3.0/MODULE.bazel": "a97c7678c19f236a956ad260d59c86e10a463badb7eb2eda787490f4c969b963", + "https://bcr.bazel.build/modules/rules_java/6.4.0/MODULE.bazel": "e986a9fe25aeaa84ac17ca093ef13a4637f6107375f64667a15999f77db6c8f6", + "https://bcr.bazel.build/modules/rules_java/6.5.2/MODULE.bazel": "1d440d262d0e08453fa0c4d8f699ba81609ed0e9a9a0f02cd10b3e7942e61e31", + "https://bcr.bazel.build/modules/rules_java/7.1.0/MODULE.bazel": "30d9135a2b6561c761bd67bd4990da591e6bdc128790ce3e7afd6a3558b2fb64", + "https://bcr.bazel.build/modules/rules_java/7.10.0/MODULE.bazel": "530c3beb3067e870561739f1144329a21c851ff771cd752a49e06e3dc9c2e71a", + "https://bcr.bazel.build/modules/rules_java/7.12.2/MODULE.bazel": "579c505165ee757a4280ef83cda0150eea193eed3bef50b1004ba88b99da6de6", + "https://bcr.bazel.build/modules/rules_java/7.2.0/MODULE.bazel": "06c0334c9be61e6cef2c8c84a7800cef502063269a5af25ceb100b192453d4ab", + "https://bcr.bazel.build/modules/rules_java/7.3.2/MODULE.bazel": "50dece891cfdf1741ea230d001aa9c14398062f2b7c066470accace78e412bc2", + "https://bcr.bazel.build/modules/rules_java/7.4.0/MODULE.bazel": "a592852f8a3dd539e82ee6542013bf2cadfc4c6946be8941e189d224500a8934", + "https://bcr.bazel.build/modules/rules_java/7.6.1/MODULE.bazel": "2f14b7e8a1aa2f67ae92bc69d1ec0fa8d9f827c4e17ff5e5f02e91caa3b2d0fe", + "https://bcr.bazel.build/modules/rules_java/8.13.0/MODULE.bazel": "0444ebf737d144cf2bb2ccb368e7f1cce735264285f2a3711785827c1686625e", + "https://bcr.bazel.build/modules/rules_java/8.13.0/source.json": "4605c0f676b87dd9d1fabd4d743b71f04d97503bd1a79aad53f87399fb5396de", + "https://bcr.bazel.build/modules/rules_java/8.3.2/MODULE.bazel": "7336d5511ad5af0b8615fdc7477535a2e4e723a357b6713af439fe8cf0195017", + "https://bcr.bazel.build/modules/rules_java/8.5.1/MODULE.bazel": "d8a9e38cc5228881f7055a6079f6f7821a073df3744d441978e7a43e20226939", + "https://bcr.bazel.build/modules/rules_java/8.6.0/MODULE.bazel": "9c064c434606d75a086f15ade5edb514308cccd1544c2b2a89bbac4310e41c71", + "https://bcr.bazel.build/modules/rules_java/8.6.1/MODULE.bazel": "f4808e2ab5b0197f094cabce9f4b006a27766beb6a9975931da07099560ca9c2", + "https://bcr.bazel.build/modules/rules_jvm_external/4.4.2/MODULE.bazel": "a56b85e418c83eb1839819f0b515c431010160383306d13ec21959ac412d2fe7", + "https://bcr.bazel.build/modules/rules_jvm_external/5.1/MODULE.bazel": "33f6f999e03183f7d088c9be518a63467dfd0be94a11d0055fe2d210f89aa909", + "https://bcr.bazel.build/modules/rules_jvm_external/5.2/MODULE.bazel": "d9351ba35217ad0de03816ef3ed63f89d411349353077348a45348b096615036", + "https://bcr.bazel.build/modules/rules_jvm_external/5.3/MODULE.bazel": "bf93870767689637164657731849fb887ad086739bd5d360d90007a581d5527d", + "https://bcr.bazel.build/modules/rules_jvm_external/6.1/MODULE.bazel": "75b5fec090dbd46cf9b7d8ea08cf84a0472d92ba3585b476f44c326eda8059c4", + "https://bcr.bazel.build/modules/rules_jvm_external/6.2/MODULE.bazel": "36a6e52487a855f33cb960724eb56547fa87e2c98a0474c3acad94339d7f8e99", + "https://bcr.bazel.build/modules/rules_jvm_external/6.3/MODULE.bazel": "c998e060b85f71e00de5ec552019347c8bca255062c990ac02d051bb80a38df0", + "https://bcr.bazel.build/modules/rules_jvm_external/6.6/MODULE.bazel": "153042249c7060536dc95b6bb9f9bb8063b8a0b0cb7acdb381bddbc2374aed55", + "https://bcr.bazel.build/modules/rules_jvm_external/6.7/MODULE.bazel": "e717beabc4d091ecb2c803c2d341b88590e9116b8bf7947915eeb33aab4f96dd", + "https://bcr.bazel.build/modules/rules_jvm_external/6.7/source.json": "5426f412d0a7fc6b611643376c7e4a82dec991491b9ce5cb1cfdd25fe2e92be4", + "https://bcr.bazel.build/modules/rules_kotlin/1.9.0/MODULE.bazel": "ef85697305025e5a61f395d4eaede272a5393cee479ace6686dba707de804d59", + "https://bcr.bazel.build/modules/rules_kotlin/1.9.5/MODULE.bazel": "043a16a572f610558ec2030db3ff0c9938574e7dd9f58bded1bb07c0192ef025", + "https://bcr.bazel.build/modules/rules_kotlin/1.9.6/MODULE.bazel": "d269a01a18ee74d0335450b10f62c9ed81f2321d7958a2934e44272fe82dcef3", + "https://bcr.bazel.build/modules/rules_kotlin/1.9.6/source.json": "2faa4794364282db7c06600b7e5e34867a564ae91bda7cae7c29c64e9466b7d5", + "https://bcr.bazel.build/modules/rules_license/0.0.3/MODULE.bazel": "627e9ab0247f7d1e05736b59dbb1b6871373de5ad31c3011880b4133cafd4bd0", + "https://bcr.bazel.build/modules/rules_license/0.0.7/MODULE.bazel": "088fbeb0b6a419005b89cf93fe62d9517c0a2b8bb56af3244af65ecfe37e7d5d", + "https://bcr.bazel.build/modules/rules_license/1.0.0/MODULE.bazel": "a7fda60eefdf3d8c827262ba499957e4df06f659330bbe6cdbdb975b768bb65c", + "https://bcr.bazel.build/modules/rules_license/1.0.0/source.json": "a52c89e54cc311196e478f8382df91c15f7a2bfdf4c6cd0e2675cc2ff0b56efb", + "https://bcr.bazel.build/modules/rules_pkg/0.7.0/MODULE.bazel": "df99f03fc7934a4737122518bb87e667e62d780b610910f0447665a7e2be62dc", + "https://bcr.bazel.build/modules/rules_pkg/1.0.1/MODULE.bazel": "5b1df97dbc29623bccdf2b0dcd0f5cb08e2f2c9050aab1092fd39a41e82686ff", + "https://bcr.bazel.build/modules/rules_pkg/1.0.1/source.json": "bd82e5d7b9ce2d31e380dd9f50c111d678c3bdaca190cb76b0e1c71b05e1ba8a", + "https://bcr.bazel.build/modules/rules_proto/4.0.0/MODULE.bazel": "a7a7b6ce9bee418c1a760b3d84f83a299ad6952f9903c67f19e4edd964894e06", + "https://bcr.bazel.build/modules/rules_proto/5.3.0-21.7/MODULE.bazel": "e8dff86b0971688790ae75528fe1813f71809b5afd57facb44dad9e8eca631b7", + "https://bcr.bazel.build/modules/rules_proto/6.0.0-rc1/MODULE.bazel": "1e5b502e2e1a9e825eef74476a5a1ee524a92297085015a052510b09a1a09483", + "https://bcr.bazel.build/modules/rules_proto/6.0.0/MODULE.bazel": "b531d7f09f58dce456cd61b4579ce8c86b38544da75184eadaf0a7cb7966453f", + "https://bcr.bazel.build/modules/rules_proto/6.0.2/MODULE.bazel": "ce916b775a62b90b61888052a416ccdda405212b6aaeb39522f7dc53431a5e73", + "https://bcr.bazel.build/modules/rules_proto/7.0.2/MODULE.bazel": "bf81793bd6d2ad89a37a40693e56c61b0ee30f7a7fdbaf3eabbf5f39de47dea2", + "https://bcr.bazel.build/modules/rules_proto/7.0.2/source.json": "1e5e7260ae32ef4f2b52fd1d0de8d03b606a44c91b694d2f1afb1d3b28a48ce1", + "https://bcr.bazel.build/modules/rules_python/0.10.2/MODULE.bazel": "cc82bc96f2997baa545ab3ce73f196d040ffb8756fd2d66125a530031cd90e5f", + "https://bcr.bazel.build/modules/rules_python/0.23.1/MODULE.bazel": "49ffccf0511cb8414de28321f5fcf2a31312b47c40cc21577144b7447f2bf300", + "https://bcr.bazel.build/modules/rules_python/0.25.0/MODULE.bazel": "72f1506841c920a1afec76975b35312410eea3aa7b63267436bfb1dd91d2d382", + "https://bcr.bazel.build/modules/rules_python/0.28.0/MODULE.bazel": "cba2573d870babc976664a912539b320cbaa7114cd3e8f053c720171cde331ed", + "https://bcr.bazel.build/modules/rules_python/0.31.0/MODULE.bazel": "93a43dc47ee570e6ec9f5779b2e64c1476a6ce921c48cc9a1678a91dd5f8fd58", + "https://bcr.bazel.build/modules/rules_python/0.33.2/MODULE.bazel": "3e036c4ad8d804a4dad897d333d8dce200d943df4827cb849840055be8d2e937", + "https://bcr.bazel.build/modules/rules_python/0.37.1/MODULE.bazel": "3faeb2d9fa0a81f8980643ee33f212308f4d93eea4b9ce6f36d0b742e71e9500", + "https://bcr.bazel.build/modules/rules_python/0.37.2/MODULE.bazel": "b5ffde91410745750b6c13be1c5dc4555ef5bc50562af4a89fd77807fdde626a", + "https://bcr.bazel.build/modules/rules_python/0.4.0/MODULE.bazel": "9208ee05fd48bf09ac60ed269791cf17fb343db56c8226a720fbb1cdf467166c", + "https://bcr.bazel.build/modules/rules_python/0.40.0/MODULE.bazel": "9d1a3cd88ed7d8e39583d9ffe56ae8a244f67783ae89b60caafc9f5cf318ada7", + "https://bcr.bazel.build/modules/rules_python/1.0.0/MODULE.bazel": "898a3d999c22caa585eb062b600f88654bf92efb204fa346fb55f6f8edffca43", + "https://bcr.bazel.build/modules/rules_python/1.2.0/MODULE.bazel": "5aeeb48b2a6c19d668b48adf2b8a2b209a6310c230db0ce77450f148a89846e4", + "https://bcr.bazel.build/modules/rules_python/1.6.3/MODULE.bazel": "a7b80c42cb3de5ee2a5fa1abc119684593704fcd2fec83165ebe615dec76574f", + "https://bcr.bazel.build/modules/rules_python/1.6.3/source.json": "f0be74977e5604a6526c8a416cda22985093ff7d5d380d41722d7e44015cc419", + "https://bcr.bazel.build/modules/rules_robolectric/4.14.1.2/MODULE.bazel": "d44fec647d0aeb67b9f3b980cf68ba634976f3ae7ccd6c07d790b59b87a4f251", + "https://bcr.bazel.build/modules/rules_robolectric/4.14.1.2/source.json": "37c10335f2361c337c5c1f34ed36d2da70534c23088062b33a8bdaab68aa9dea", + "https://bcr.bazel.build/modules/rules_shell/0.1.2/MODULE.bazel": "66e4ca3ce084b04af0b9ff05ff14cab4e5df7503973818bb91cbc6cda08d32fc", + "https://bcr.bazel.build/modules/rules_shell/0.2.0/MODULE.bazel": "fda8a652ab3c7d8fee214de05e7a9916d8b28082234e8d2c0094505c5268ed3c", + "https://bcr.bazel.build/modules/rules_shell/0.3.0/MODULE.bazel": "de4402cd12f4cc8fda2354fce179fdb068c0b9ca1ec2d2b17b3e21b24c1a937b", + "https://bcr.bazel.build/modules/rules_shell/0.6.1/MODULE.bazel": "72e76b0eea4e81611ef5452aa82b3da34caca0c8b7b5c0c9584338aa93bae26b", + "https://bcr.bazel.build/modules/rules_shell/0.6.1/source.json": "20ec05cd5e592055e214b2da8ccb283c7f2a421ea0dc2acbf1aa792e11c03d0c", + "https://bcr.bazel.build/modules/rules_swift/1.16.0/MODULE.bazel": "4a09f199545a60d09895e8281362b1ff3bb08bbde69c6fc87aff5b92fcc916ca", + "https://bcr.bazel.build/modules/rules_swift/2.1.1/MODULE.bazel": "494900a80f944fc7aa61500c2073d9729dff0b764f0e89b824eb746959bc1046", + "https://bcr.bazel.build/modules/rules_swift/2.1.1/source.json": "40fc69dfaac64deddbb75bd99cdac55f4427d9ca0afbe408576a65428427a186", + "https://bcr.bazel.build/modules/stardoc/0.5.1/MODULE.bazel": "1a05d92974d0c122f5ccf09291442580317cdd859f07a8655f1db9a60374f9f8", + "https://bcr.bazel.build/modules/stardoc/0.5.3/MODULE.bazel": "c7f6948dae6999bf0db32c1858ae345f112cacf98f174c7a8bb707e41b974f1c", + "https://bcr.bazel.build/modules/stardoc/0.5.6/MODULE.bazel": "c43dabc564990eeab55e25ed61c07a1aadafe9ece96a4efabb3f8bf9063b71ef", + "https://bcr.bazel.build/modules/stardoc/0.6.2/MODULE.bazel": "7060193196395f5dd668eda046ccbeacebfd98efc77fed418dbe2b82ffaa39fd", + "https://bcr.bazel.build/modules/stardoc/0.7.0/MODULE.bazel": "05e3d6d30c099b6770e97da986c53bd31844d7f13d41412480ea265ac9e8079c", + "https://bcr.bazel.build/modules/stardoc/0.7.1/MODULE.bazel": "3548faea4ee5dda5580f9af150e79d0f6aea934fc60c1cc50f4efdd9420759e7", + "https://bcr.bazel.build/modules/stardoc/0.7.2/MODULE.bazel": "fc152419aa2ea0f51c29583fab1e8c99ddefd5b3778421845606ee628629e0e5", + "https://bcr.bazel.build/modules/stardoc/0.7.2/source.json": "58b029e5e901d6802967754adf0a9056747e8176f017cfe3607c0851f4d42216", + "https://bcr.bazel.build/modules/swift_argument_parser/1.3.1.1/MODULE.bazel": "5e463fbfba7b1701d957555ed45097d7f984211330106ccd1352c6e0af0dcf91", + "https://bcr.bazel.build/modules/swift_argument_parser/1.3.1.1/source.json": "32bd87e5f4d7acc57c5b2ff7c325ae3061d5e242c0c4c214ae87e0f1c13e54cb", + "https://bcr.bazel.build/modules/upb/0.0.0-20220923-a547704/MODULE.bazel": "7298990c00040a0e2f121f6c32544bab27d4452f80d9ce51349b1a28f3005c43", + "https://bcr.bazel.build/modules/upb/0.0.0-20230516-61a97ef/MODULE.bazel": "c0df5e35ad55e264160417fd0875932ee3c9dda63d9fccace35ac62f45e1b6f9", + "https://bcr.bazel.build/modules/zlib/1.2.11/MODULE.bazel": "07b389abc85fdbca459b69e2ec656ae5622873af3f845e1c9d80fe179f3effa0", + "https://bcr.bazel.build/modules/zlib/1.2.12/MODULE.bazel": "3b1a8834ada2a883674be8cbd36ede1b6ec481477ada359cd2d3ddc562340b27", + "https://bcr.bazel.build/modules/zlib/1.3.1.bcr.3/MODULE.bazel": "af322bc08976524477c79d1e45e241b6efbeb918c497e8840b8ab116802dda79", + "https://bcr.bazel.build/modules/zlib/1.3.1.bcr.5/MODULE.bazel": "eec517b5bbe5492629466e11dae908d043364302283de25581e3eb944326c4ca", + "https://bcr.bazel.build/modules/zlib/1.3.1.bcr.5/source.json": "22bc55c47af97246cfc093d0acf683a7869377de362b5d1c552c2c2e16b7a806", + "https://bcr.bazel.build/modules/zlib/1.3.1/MODULE.bazel": "751c9940dcfe869f5f7274e1295422a34623555916eb98c174c1e945594bf198" + }, + "selectedYankedVersions": {}, + "moduleExtensions": { + "//:extensions.bzl%llvm_deps_extension": { + "general": { + "bzlTransitiveDigest": "LGeZ4Ibt22AGXloFt/bm3EsBB05m6aTG+WxfH8fJVB4=", + "usagesDigest": "dHBLC1g5cqg/flxcuZRJMp2heDoB4+0/NDd6MutLhGE=", + "recordedFileInputs": {}, + "recordedDirentsInputs": {}, + "envVariables": {}, + "generatedRepoSpecs": { + "llvm-raw": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:local.bzl%new_local_repository", + "attributes": { + "build_file_content": "# empty", + "path": "../../" + } + }, + "llvm_zlib": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive", + "attributes": { + "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:zlib-ng.BUILD", + "sha256": "e36bb346c00472a1f9ff2a0a4643e590a254be6379da7cddd9daeb9a7f296731", + "strip_prefix": "zlib-ng-2.0.7", + "urls": [ + "https://github.com/zlib-ng/zlib-ng/archive/refs/tags/2.0.7.zip" + ] + } + }, + "vulkan_headers": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive", + "attributes": { + "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:vulkan_headers.BUILD", + "sha256": "19f491784ef0bc73caff877d11c96a48b946b5a1c805079d9006e3fbaa5c1895", + "strip_prefix": "Vulkan-Headers-9bd3f561bcee3f01d22912de10bb07ce4e23d378", + "urls": [ + "https://github.com/KhronosGroup/Vulkan-Headers/archive/9bd3f561bcee3f01d22912de10bb07ce4e23d378.tar.gz" + ] + } + }, + "vulkan_sdk_setup": { + "repoRuleId": "@@//:vulkan_sdk.bzl%vulkan_sdk_setup", + "attributes": {} + }, + "gmp": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive", + "attributes": { + "urls": [ + "https://gmplib.org/download/gmp/gmp-6.2.1.tar.xz", + "https://ftp.gnu.org/gnu/gmp/gmp-6.2.1.tar.xz" + ], + "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:gmp.BUILD", + "sha256": "fd4829912cddd12f84181c3451cc752be224643e87fac497b69edddadc49b4f2", + "strip_prefix": "gmp-6.2.1" + } + }, + "mpfr": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive", + "attributes": { + "urls": [ + "https://www.mpfr.org/mpfr-current/mpfr-4.2.2.tar.gz" + ], + "sha256": "826cbb24610bd193f36fde172233fb8c009f3f5c2ad99f644d0dea2e16a20e42", + "strip_prefix": "mpfr-4.2.2", + "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:mpfr.BUILD" + } + }, + "mpc": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive", + "attributes": { + "urls": [ + "https://ftp.gnu.org/gnu/mpc/mpc-1.3.1.tar.gz" + ], + "sha256": "ab642492f5cf882b74aa0cb730cd410a81edcdbec895183ce930e706c1c759b8", + "strip_prefix": "mpc-1.3.1", + "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:mpc.BUILD" + } + }, + "pfm": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive", + "attributes": { + "urls": [ + "https://versaweb.dl.sourceforge.net/project/perfmon2/libpfm4/libpfm-4.13.0.tar.gz" + ], + "sha256": "d18b97764c755528c1051d376e33545d0eb60c6ebf85680436813fa5b04cc3d1", + "strip_prefix": "libpfm-4.13.0", + "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:pfm.BUILD" + } + }, + "llvm_zstd": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive", + "attributes": { + "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:zstd.BUILD", + "sha256": "7c42d56fac126929a6a85dbc73ff1db2411d04f104fae9bdea51305663a83fd0", + "strip_prefix": "zstd-1.5.2", + "urls": [ + "https://github.com/facebook/zstd/releases/download/v1.5.2/zstd-1.5.2.tar.gz" + ] + } + }, + "pybind11": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive", + "attributes": { + "url": "https://github.com/pybind/pybind11/archive/v2.10.3.zip", + "sha256": "201966a61dc826f1b1879a24a3317a1ec9214a918c8eb035be2f30c3e9cfbdcb", + "strip_prefix": "pybind11-2.10.3", + "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:pybind.BUILD" + } + }, + "pyyaml": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive", + "attributes": { + "url": "https://github.com/yaml/pyyaml/archive/refs/tags/5.1.zip", + "sha256": "f0a35d7f282a6d6b1a4f3f3965ef5c124e30ed27a0088efb97c0977268fd671f", + "strip_prefix": "pyyaml-5.1/lib3", + "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:pyyaml.BUILD" + } + }, + "robin_map": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive", + "attributes": { + "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:robin_map.BUILD", + "sha256": "a8424ad3b0affd4c57ed26f0f3d8a29604f0e1f2ef2089f497f614b1c94c7236", + "strip_prefix": "robin-map-1.3.0", + "url": "https://github.com/Tessil/robin-map/archive/refs/tags/v1.3.0.tar.gz" + } + }, + "nanobind": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive", + "attributes": { + "build_file": "@@+llvm_deps_extension+llvm-raw//utils/bazel/third_party_build:nanobind.BUILD", + "sha256": "8ce3667dce3e64fc06bfb9b778b6f48731482362fb89a43da156632266cd5a90", + "strip_prefix": "nanobind-2.9.2", + "url": "https://github.com/wjakob/nanobind/archive/refs/tags/v2.9.2.tar.gz" + } + } + }, + "recordedRepoMappingEntries": [ + [ + "", + "bazel_tools", + "bazel_tools" + ] + ] + } + }, + "@@rules_android+//rules/android_sdk_repository:rule.bzl%android_sdk_repository_extension": { + "general": { + "bzlTransitiveDigest": "NAy+0M15JNVEBb8Tny6t7j3lKqTnsAMjoBB6LJ+C370=", + "usagesDigest": "g9Ur6X6qhf9a8MmY9qXU/jFjkyk/aZVBegI0yVMF0z4=", + "recordedFileInputs": {}, + "recordedDirentsInputs": {}, + "envVariables": {}, + "generatedRepoSpecs": { + "androidsdk": { + "repoRuleId": "@@rules_android+//rules/android_sdk_repository:rule.bzl%_android_sdk_repository", + "attributes": {} + } + }, + "recordedRepoMappingEntries": [] + } + }, + "@@rules_kotlin+//src/main/starlark/core/repositories:bzlmod_setup.bzl%rules_kotlin_extensions": { + "general": { + "bzlTransitiveDigest": "sFhcgPbDQehmbD1EOXzX4H1q/CD5df8zwG4kp4jbvr8=", + "usagesDigest": "QI2z8ZUR+mqtbwsf2fLqYdJAkPOHdOV+tF2yVAUgRzw=", + "recordedFileInputs": {}, + "recordedDirentsInputs": {}, + "envVariables": {}, + "generatedRepoSpecs": { + "com_github_jetbrains_kotlin_git": { + "repoRuleId": "@@rules_kotlin+//src/main/starlark/core/repositories:compiler.bzl%kotlin_compiler_git_repository", + "attributes": { + "urls": [ + "https://github.com/JetBrains/kotlin/releases/download/v1.9.23/kotlin-compiler-1.9.23.zip" + ], + "sha256": "93137d3aab9afa9b27cb06a824c2324195c6b6f6179d8a8653f440f5bd58be88" + } + }, + "com_github_jetbrains_kotlin": { + "repoRuleId": "@@rules_kotlin+//src/main/starlark/core/repositories:compiler.bzl%kotlin_capabilities_repository", + "attributes": { + "git_repository_name": "com_github_jetbrains_kotlin_git", + "compiler_version": "1.9.23" + } + }, + "com_github_google_ksp": { + "repoRuleId": "@@rules_kotlin+//src/main/starlark/core/repositories:ksp.bzl%ksp_compiler_plugin_repository", + "attributes": { + "urls": [ + "https://github.com/google/ksp/releases/download/1.9.23-1.0.20/artifacts.zip" + ], + "sha256": "ee0618755913ef7fd6511288a232e8fad24838b9af6ea73972a76e81053c8c2d", + "strip_version": "1.9.23-1.0.20" + } + }, + "com_github_pinterest_ktlint": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_file", + "attributes": { + "sha256": "01b2e0ef893383a50dbeb13970fe7fa3be36ca3e83259e01649945b09d736985", + "urls": [ + "https://github.com/pinterest/ktlint/releases/download/1.3.0/ktlint" + ], + "executable": true + } + }, + "rules_android": { + "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive", + "attributes": { + "sha256": "cd06d15dd8bb59926e4d65f9003bfc20f9da4b2519985c27e190cddc8b7a7806", + "strip_prefix": "rules_android-0.1.1", + "urls": [ + "https://github.com/bazelbuild/rules_android/archive/v0.1.1.zip" + ] + } + } + }, + "recordedRepoMappingEntries": [ + [ + "rules_kotlin+", + "bazel_tools", + "bazel_tools" + ] + ] + } + }, + "@@rules_python+//python/uv:uv.bzl%uv": { + "general": { + "bzlTransitiveDigest": "477hS4MXeJ7LqPNLTqL+1ltraV5lqwOw3tEXWqnJRt8=", + "usagesDigest": "icnInV8HDGrRQf9x8RMfxWfBHgT3OgRlYovS/9POEJw=", + "recordedFileInputs": {}, + "recordedDirentsInputs": {}, + "envVariables": {}, + "generatedRepoSpecs": { + "uv": { + "repoRuleId": "@@rules_python+//python/uv/private:uv_toolchains_repo.bzl%uv_toolchains_repo", + "attributes": { + "toolchain_type": "'@@rules_python+//python/uv:uv_toolchain_type'", + "toolchain_names": [ + "none" + ], + "toolchain_implementations": { + "none": "'@@rules_python+//python:none'" + }, + "toolchain_compatible_with": { + "none": [ + "@platforms//:incompatible" + ] + }, + "toolchain_target_settings": {} + } + } + }, + "recordedRepoMappingEntries": [ + [ + "rules_python+", + "bazel_tools", + "bazel_tools" + ] + ] + } + } + } +} diff --git a/utils/bazel/extensions.bzl b/utils/bazel/extensions.bzl new file mode 100644 index 0000000000000..b0d5871b722a7 --- /dev/null +++ b/utils/bazel/extensions.bzl @@ -0,0 +1,127 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +"""bzlmod extensions for llvm-project""" + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +load("@bazel_tools//tools/build_defs/repo:local.bzl", "new_local_repository") +load(":vulkan_sdk.bzl", "vulkan_sdk_setup") + +def _llvm_repos_extension_impl(module_ctx): + if any([m.is_root and m.name == "llvm-project-overlay" for m in module_ctx.modules]): + new_local_repository( + name = "llvm-raw", + build_file_content = "# empty", + path = "../../", + ) + + http_archive( + name = "llvm_zlib", + build_file = "@llvm-raw//utils/bazel/third_party_build:zlib-ng.BUILD", + sha256 = "e36bb346c00472a1f9ff2a0a4643e590a254be6379da7cddd9daeb9a7f296731", + strip_prefix = "zlib-ng-2.0.7", + urls = [ + "https://github.com/zlib-ng/zlib-ng/archive/refs/tags/2.0.7.zip", + ], + ) + + http_archive( + name = "vulkan_headers", + build_file = "@llvm-raw//utils/bazel/third_party_build:vulkan_headers.BUILD", + sha256 = "19f491784ef0bc73caff877d11c96a48b946b5a1c805079d9006e3fbaa5c1895", + strip_prefix = "Vulkan-Headers-9bd3f561bcee3f01d22912de10bb07ce4e23d378", + urls = [ + "https://github.com/KhronosGroup/Vulkan-Headers/archive/9bd3f561bcee3f01d22912de10bb07ce4e23d378.tar.gz", + ], + ) + + vulkan_sdk_setup(name = "vulkan_sdk_setup") + + http_archive( + name = "gmp", + urls = [ + "https://gmplib.org/download/gmp/gmp-6.2.1.tar.xz", + "https://ftp.gnu.org/gnu/gmp/gmp-6.2.1.tar.xz", + ], + build_file = "@llvm-raw//utils/bazel/third_party_build:gmp.BUILD", + sha256 = "fd4829912cddd12f84181c3451cc752be224643e87fac497b69edddadc49b4f2", + strip_prefix = "gmp-6.2.1", + ) + + http_archive( + name = "mpfr", + urls = [ + "https://www.mpfr.org/mpfr-current/mpfr-4.2.2.tar.gz", + ], + sha256 = "826cbb24610bd193f36fde172233fb8c009f3f5c2ad99f644d0dea2e16a20e42", + strip_prefix = "mpfr-4.2.2", + build_file = "@llvm-raw//utils/bazel/third_party_build:mpfr.BUILD", + ) + + http_archive( + name = "mpc", + urls = [ + "https://ftp.gnu.org/gnu/mpc/mpc-1.3.1.tar.gz", + ], + sha256 = "ab642492f5cf882b74aa0cb730cd410a81edcdbec895183ce930e706c1c759b8", + strip_prefix = "mpc-1.3.1", + build_file = "@llvm-raw//utils/bazel/third_party_build:mpc.BUILD", + ) + + http_archive( + name = "pfm", + urls = [ + "https://versaweb.dl.sourceforge.net/project/perfmon2/libpfm4/libpfm-4.13.0.tar.gz", + ], + sha256 = "d18b97764c755528c1051d376e33545d0eb60c6ebf85680436813fa5b04cc3d1", + strip_prefix = "libpfm-4.13.0", + build_file = "@llvm-raw//utils/bazel/third_party_build:pfm.BUILD", + ) + + http_archive( + name = "llvm_zstd", + build_file = "@llvm-raw//utils/bazel/third_party_build:zstd.BUILD", + sha256 = "7c42d56fac126929a6a85dbc73ff1db2411d04f104fae9bdea51305663a83fd0", + strip_prefix = "zstd-1.5.2", + urls = [ + "https://github.com/facebook/zstd/releases/download/v1.5.2/zstd-1.5.2.tar.gz", + ], + ) + + http_archive( + name = "pybind11", + url = "https://github.com/pybind/pybind11/archive/v2.10.3.zip", + sha256 = "201966a61dc826f1b1879a24a3317a1ec9214a918c8eb035be2f30c3e9cfbdcb", + strip_prefix = "pybind11-2.10.3", + build_file = "@llvm-raw//utils/bazel/third_party_build:pybind.BUILD", + ) + + http_archive( + name = "pyyaml", + url = "https://github.com/yaml/pyyaml/archive/refs/tags/5.1.zip", + sha256 = "f0a35d7f282a6d6b1a4f3f3965ef5c124e30ed27a0088efb97c0977268fd671f", + strip_prefix = "pyyaml-5.1/lib3", + build_file = "@llvm-raw//utils/bazel/third_party_build:pyyaml.BUILD", + ) + + # TODO: bump to robin-map-1.4.0 + http_archive( + name = "robin_map", + build_file = "@llvm-raw//utils/bazel/third_party_build:robin_map.BUILD", + sha256 = "a8424ad3b0affd4c57ed26f0f3d8a29604f0e1f2ef2089f497f614b1c94c7236", + strip_prefix = "robin-map-1.3.0", + url = "https://github.com/Tessil/robin-map/archive/refs/tags/v1.3.0.tar.gz", + ) + + http_archive( + name = "nanobind", + build_file = "@llvm-raw//utils/bazel/third_party_build:nanobind.BUILD", + sha256 = "8ce3667dce3e64fc06bfb9b778b6f48731482362fb89a43da156632266cd5a90", + strip_prefix = "nanobind-2.9.2", + url = "https://github.com/wjakob/nanobind/archive/refs/tags/v2.9.2.tar.gz", + ) + +llvm_repos_extension = module_extension( + implementation = _llvm_repos_extension_impl, +) diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index deb56dc0957e9..790709bdef05c 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -1025,6 +1025,7 @@ cc_library( gentbl_cc_library( name = "sema_attr_gen", tbl_outs = { + "include/clang/Sema/AttrIsTypeDependent.inc": ["-gen-clang-attr-is-type-dependent"], "include/clang/Sema/AttrParsedAttrImpl.inc": ["-gen-clang-attr-parsed-attr-impl"], "include/clang/Sema/AttrParsedAttrKinds.inc": ["-gen-clang-attr-parsed-attr-kinds"], "include/clang/Sema/AttrSpellingListIndex.inc": ["-gen-clang-attr-spelling-index"], diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index 635f77215b38f..ddad2f4f7611d 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -4100,6 +4100,7 @@ cc_library( ":DebugInfo", ":DebugInfoDWARF", ":JITLink", + ":Object", ":OrcJIT", ":OrcShared", ":Support",